diff options
Diffstat (limited to 'sql')
147 files changed, 53802 insertions, 9804 deletions
diff --git a/sql/Makefile.am b/sql/Makefile.am index d701c18a4d7..b0824110792 100644 --- a/sql/Makefile.am +++ b/sql/Makefile.am @@ -19,8 +19,8 @@ MYSQLDATAdir = $(localstatedir) MYSQLSHAREdir = $(pkgdatadir) MYSQLBASEdir= $(prefix) +MYSQLLIBdir= $(pkglibdir) INCLUDES = @ZLIB_INCLUDES@ \ - @bdb_includes@ @innodb_includes@ @ndbcluster_includes@ \ -I$(top_builddir)/include -I$(top_srcdir)/include \ -I$(top_srcdir)/regex -I$(srcdir) $(yassl_includes) \ $(openssl_includes) @@ -30,48 +30,48 @@ libexec_PROGRAMS = mysqld noinst_PROGRAMS = gen_lex_hash bin_PROGRAMS = mysql_tzinfo_to_sql gen_lex_hash_LDFLAGS = @NOINST_LDFLAGS@ -LDADD = $(top_builddir)/myisam/libmyisam.a \ - $(top_builddir)/myisammrg/libmyisammrg.a \ - $(top_builddir)/heap/libheap.a \ +LDADD = $(top_builddir)/storage/myisam/libmyisam.a \ + $(top_builddir)/storage/myisammrg/libmyisammrg.a \ + $(top_builddir)/storage/heap/libheap.a \ $(top_builddir)/vio/libvio.a \ $(top_builddir)/mysys/libmysys.a \ $(top_builddir)/dbug/libdbug.a \ $(top_builddir)/regex/libregex.a \ - $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ @NDB_SCI_LIBS@ + $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ mysqld_LDADD = @MYSQLD_EXTRA_LDFLAGS@ \ - @bdb_libs@ @innodb_libs@ @pstack_libs@ \ - @innodb_system_libs@ \ - @ndbcluster_libs@ @ndbcluster_system_libs@ \ + @pstack_libs@ \ + @mysql_se_objs@ @mysql_se_libs@ \ $(LDADD) $(CXXLDFLAGS) $(WRAPLIBS) @LIBDL@ \ @yassl_libs@ @openssl_libs@ noinst_HEADERS = item.h item_func.h item_sum.h item_cmpfunc.h \ item_strfunc.h item_timefunc.h item_uniq.h \ + item_xmlfunc.h \ item_create.h item_subselect.h item_row.h \ mysql_priv.h item_geofunc.h sql_bitmap.h \ procedure.h sql_class.h sql_lex.h sql_list.h \ sql_manager.h sql_map.h sql_string.h unireg.h \ sql_error.h field.h handler.h mysqld_suffix.h \ - ha_myisammrg.h\ - ha_heap.h ha_myisam.h ha_berkeley.h ha_innodb.h \ - ha_ndbcluster.h opt_range.h protocol.h \ + ha_heap.h ha_myisam.h ha_myisammrg.h ha_partition.h \ + opt_range.h protocol.h rpl_tblmap.h \ + log.h sql_show.h rpl_rli.h \ sql_select.h structs.h table.h sql_udf.h hash_filo.h\ lex.h lex_symbol.h sql_acl.h sql_crypt.h \ - log_event.h sql_repl.h slave.h \ + log_event.h sql_repl.h slave.h rpl_filter.h \ + rpl_injector.h \ stacktrace.h sql_sort.h sql_cache.h set_var.h \ spatial.h gstream.h client_settings.h tzfile.h \ tztime.h my_decimal.h\ sp_head.h sp_pcontext.h sp_rcontext.h sp.h sp_cache.h \ parse_file.h sql_view.h sql_trigger.h \ - sql_array.h sql_cursor.h \ - examples/ha_example.h ha_archive.h \ - examples/ha_tina.h ha_blackhole.h \ - ha_federated.h -mysqld_SOURCES = sql_lex.cc sql_handler.cc \ + sql_array.h sql_cursor.h event.h event_priv.h \ + sql_plugin.h authors.h sql_partition.h \ + partition_info.h partition_element.h +mysqld_SOURCES = sql_lex.cc sql_handler.cc sql_partition.cc \ item.cc item_sum.cc item_buff.cc item_func.cc \ item_cmpfunc.cc item_strfunc.cc item_timefunc.cc \ thr_malloc.cc item_create.cc item_subselect.cc \ - item_row.cc item_geofunc.cc \ + item_row.cc item_geofunc.cc item_xmlfunc.cc \ field.cc strfunc.cc key.cc sql_class.cc sql_list.cc \ net_serv.cc protocol.cc sql_state.c \ lock.cc my_lock.c \ @@ -86,13 +86,13 @@ mysqld_SOURCES = sql_lex.cc sql_handler.cc \ unireg.cc des_key_file.cc \ discover.cc time.cc opt_range.cc opt_sum.cc \ records.cc filesort.cc handler.cc \ - ha_heap.cc ha_myisam.cc ha_myisammrg.cc \ - ha_berkeley.cc ha_innodb.cc \ - ha_ndbcluster.cc \ + ha_heap.cc ha_myisam.cc ha_myisammrg.cc \ sql_db.cc sql_table.cc sql_rename.cc sql_crypt.cc \ sql_load.cc mf_iocache.cc field_conv.cc sql_show.cc \ sql_udf.cc sql_analyse.cc sql_analyse.h sql_cache.cc \ - slave.cc sql_repl.cc sql_union.cc sql_derived.cc \ + slave.cc sql_repl.cc rpl_filter.cc rpl_tblmap.cc \ + rpl_injector.cc \ + sql_union.cc sql_derived.cc \ client.c sql_client.cc mini_client_errors.c pack.c\ stacktrace.c repl_failsafe.h repl_failsafe.cc \ sql_olap.cc sql_view.cc \ @@ -100,10 +100,17 @@ mysqld_SOURCES = sql_lex.cc sql_handler.cc \ tztime.cc my_time.c my_user.c my_decimal.cc\ sp_head.cc sp_pcontext.cc sp_rcontext.cc sp.cc \ sp_cache.cc parse_file.cc sql_trigger.cc \ - examples/ha_example.cc ha_archive.cc \ - examples/ha_tina.cc ha_blackhole.cc \ - ha_federated.cc - + event_executor.cc event.cc event_timed.cc \ + sql_plugin.cc sql_binlog.cc \ + handlerton.cc sql_tablespace.cc partition_info.cc +EXTRA_mysqld_SOURCES = ha_innodb.cc ha_berkeley.cc ha_archive.cc \ + ha_innodb.h ha_berkeley.h ha_archive.h \ + ha_blackhole.cc ha_federated.cc ha_ndbcluster.cc \ + ha_blackhole.h ha_federated.h ha_ndbcluster.h \ + ha_ndbcluster_binlog.cc ha_ndbcluster_binlog.h \ + ha_ndbcluster_tables.h \ + ha_partition.cc ha_partition.h +mysqld_DEPENDENCIES = @mysql_se_objs@ gen_lex_hash_SOURCES = gen_lex_hash.cc gen_lex_hash_LDADD = $(LDADD) $(CXXLDFLAGS) mysql_tzinfo_to_sql_SOURCES = mysql_tzinfo_to_sql.cc @@ -113,11 +120,12 @@ DEFS = -DMYSQL_SERVER \ -DDEFAULT_MYSQL_HOME="\"$(MYSQLBASEdir)\"" \ -DDATADIR="\"$(MYSQLDATAdir)\"" \ -DSHAREDIR="\"$(MYSQLSHAREdir)\"" \ + -DLIBDIR="\"$(MYSQLLIBdir)\"" \ @DEFS@ BUILT_SOURCES = sql_yacc.cc sql_yacc.h lex_hash.h -EXTRA_DIST = udf_example.cc $(BUILT_SOURCES) -DISTCLEANFILES = lex_hash.h +EXTRA_DIST = udf_example.cc handlerton-win.cc $(BUILT_SOURCES) +CLEANFILES = lex_hash.h sql_yacc.cc sql_yacc.h AM_YFLAGS = -d mysql_tzinfo_to_sql.cc: @@ -153,6 +161,19 @@ sql_yacc.o: sql_yacc.cc sql_yacc.h $(HEADERS) lex_hash.h: gen_lex_hash$(EXEEXT) ./gen_lex_hash$(EXEEXT) > $@ +ha_berkeley.o: ha_berkeley.cc ha_berkeley.h + $(CXXCOMPILE) @bdb_includes@ $(LM_CFLAGS) -c $< + +ha_ndbcluster.o:ha_ndbcluster.cc ha_ndbcluster.h + $(CXXCOMPILE) @ndbcluster_includes@ $(LM_CFLAGS) -c $< + +ha_ndbcluster_binlog.o:ha_ndbcluster_binlog.cc ha_ndbcluster_binlog.h + $(CXXCOMPILE) @ndbcluster_includes@ $(LM_CFLAGS) -c $< + +#Until we can get rid of dependencies on ha_ndbcluster.h +handler.o: handler.cc ha_ndbcluster.h + $(CXXCOMPILE) @ndbcluster_includes@ $(CXXFLAGS) -c $< + # For testing of udf_example.so; Works on platforms with gcc # (This is not part of our build process but only provided as an example) udf_example.so: udf_example.cc diff --git a/sql/authors.h b/sql/authors.h new file mode 100644 index 00000000000..fde1806f4be --- /dev/null +++ b/sql/authors.h @@ -0,0 +1,146 @@ +/* Copyright (C) 2005 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Structure of the name list */ + +struct show_table_authors_st { + const char *name; + const char *location; + const char *comment; +}; + +/* + Output from "SHOW AUTHORS" + + If you can update it, you get to be in it :) + + Don't be offended if your name is not in here, just add it! + + IMPORTANT: Names should be added in alphabetical order (by last name). + + Names should be encoded using UTF-8. +*/ + +struct show_table_authors_st show_table_authors[]= { + { "Brian (Krow) Aker", "Seattle, WA, USA", + "Architecture, archive, federated, bunch of little stuff :)" }, + { "Venu Anuganti", "", "Client/server protocol (4.1)" }, + { "David Axmark", "Uppsala, Sweden", + "Small stuff long time ago, Monty ripped it out!" }, + { "Alexander (Bar) Barkov", "Izhevsk, Russia", + "Unicode and character sets (4.1)" }, + { "Omer BarNir", "Sunnyvale, CA, USA", + "Testing (sometimes) and general QA stuff" }, + { "Guilhem Bichot", "Bordeaux, France", "Replication (since 4.0)" }, + { "John Birrell", "", "Emulation of pthread_mutex() for OS/2" }, + { "Andreas F. Bobak", "", "AGGREGATE extension to user-defined functions" }, + { "Alexey Botchkov (Holyfoot)", "Izhevsk, Russia", + "GIS extensions (4.1), embedded server (4.1), precision math (5.0)"}, + { "Reggie Burnett", "Nashville, TN, USA", "Windows development, Connectors" }, + { "Oleksandr Byelkin", "Lugansk, Ukraine", + "Query Cache (4.0), Subqueries (4.1), Views (5.0)" }, + { "Kent Boortz", "Orebro, Sweden", "Test platform, and general build stuff" }, + { "Tim Bunce", "", "mysqlhotcopy" }, + { "Yves Carlier", "", "mysqlaccess" }, + { "Joshua Chamas", "Cupertino, CA, USA", + "Concurrent insert, extended date syntax" }, + { "Petr Chardin", "Moscow, Russia", "Instance Manager (5.0)" }, + { "Wei-Jou Chen", "", "Chinese (Big5) character set" }, + { "Albert Chin-A-Young", "", + "Tru64 port, large file support, better TCP wrappers support" }, + { "Jorge del Conde", "Mexico City, Mexico", "Windows development" }, + { "Antony T. Curtis", "Norwalk, CA, USA", + "Parser, port to OS/2, storage engines and some random stuff" }, + { "Yuri Dario", "", "OS/2 port" }, + { "Sergei Golubchik", "Kerpen, Germany", + "Full-text search, precision math" }, + { "Lenz Grimmer", "Hamburg, Germany", + "Production (build and release) engineering" }, + { "Nikolay Grishakin", "Austin, TX, USA", "Testing - Server" }, + { "Wei He", "", "Chinese (GBK) character set" }, + { "Eric Herman", "Amsterdam, Netherlands", "Bug fixing - federated" }, + { "Alexander (Alexi) Ivanov", "St. Petersburg, Russia", "Replication" }, + { "Alexander (Salle) Keremidarski", "Sofia, Bulgaria", + "Bug fixing" }, + { "Mats Kindahl", "Storvreta, Sweden", "Replication" }, + { "Serge Kozlov", "Velikie Luki, Russia", "Testing - Cluster" }, + { "Greg (Groggy) Lehey", "Uchunga, SA, Australia", "Backup" }, + { "Matthias Leich", "Berlin, Germany", "Testing - Server" }, + { "Dmitri Lenev", "Moscow, Russia", + "Time zones support (4.1), Triggers (5.0)" }, + { "Marc Liyanage", "", "Created Mac OS X packages" }, + { "Zarko Mocnik", "", "Sorting for Slovenian language" }, + { "Per-Erik Martin", "Uppsala, Sweden", "Stored Procedures (5.0)" }, + { "Alexis Mikhailov", "", "User-defined functions" }, + { "Sinisa Milivojevic", "Larnaca, Cyprus", + "UNION (4.0), Subqueries in FROM clause (4.1), many other features" }, + { "Jonathan (Jeb) Miller", "Kyle, TX, USA", + "Testing - Cluster, Replication" }, + { "Elliot Murphy", "Cocoa, FL, USA", "Replication and backup" }, + { "Kristian Nielsen", "Copenhagen, Denmark", + "General build stuff" }, + { "Pekka Nouisiainen", "Stockholm, Sweden", + "NDB Cluster: BLOB support, character set support, ordered indexes" }, + { "Alexander Nozdrin", "Moscow, Russia", + "Bug fixing (Stored Procedures, 5.0)" }, + { "Per Eric Olsson", "", "Testing of dynamic record format" }, + { "Jonas Oreland", "Stockholm, Sweden", + "NDB Cluster, Online Backup, lots of other things" }, + { "Konstantin Osipov", "Moscow, Russia", + "Prepared statements (4.1), Cursors (5.0)" }, + { "Alexander (Sasha) Pachev", "Provo, UT, USA", + "Statement-based replication, SHOW CREATE TABLE, mysql-bench" }, + { "Irena Pancirov", "", "Port to Windows with Borland compiler" }, + { "Jan Pazdziora", "", "Czech sorting order" }, + { "Benjamin Pflugmann", "", + "Extended MERGE storage engine to handle INSERT" }, + { "Igor Romanenko", "", + "mysqldump" }, + { "Mikael Ronström", "Stockholm, Sweden", + "NDB Cluster, Partitioning (5.1), Optimizations" }, + { "Tõnu Samuel", "", + "VIO interface, other miscellaneous features" }, + { "Carsten Segieth (Pino)", "Fredersdorf, Germany", "Testing - Server"}, + { "Martin Sköld", "Stockholm, Sweden", + "NDB Cluster: Unique indexes, integration into MySQL" }, + { "Timothy Smith", "Auckland, New Zealand", + "Dynamic character sets, parts of the build system, libmysqld"}, + { "Miguel Solorzano", "Florianopolis, Santa Catarina, Brazil", + "Windows development, Windows NT service"}, + { "Punita Srivastava", "Austin, TX, USA", "Testing - Merlin"}, + { "Alexey Stroganov (Ranger)", "Lugansk, Ukraine", "Testing - Benchmarks"}, + { "Ingo Strüwing", "Berlin, Germany", "Bug fixing" }, + { "Magnus Svensson", "Öregrund, Sweden", + "NDB Cluster: Integration into MySQL, test framework" }, + { "Zeev Suraski", "", "FROM_UNIXTIME(), ENCRYPT()" }, + { "TAMITO", "", + "The _MB character set macros and UJIS and SJIS character sets" }, + { "Jani Tolonen", "Helsinki, Finland", + "mysqlimport, extensions to command-line clients, PROCEDURE ANALYSE()" }, + { "Lars Thalmann", "Stockholm, Sweden", + "Replication and cluster development" }, + { "Tomas Ulin", "Stockholm, Sweden", + "NDB Cluster: Configuration, installation" }, + { "Gianmassimo Vigazzola", "", "Initial Windows port" }, + { "Sergey Vojtovich", "Izhevsk, Russia", "Plugins infrastructure (5.1)" }, + { "Matt Wagner", "Northfield, MN, USA", "Bug fixing" }, + { "Jim Winstead Jr.", "Los Angeles, CA, USA", "Bug fixing" }, + { "Michael (Monty) Widenius", "Tusby, Finland", + "Lead developer and main author" }, + { "Peter Zaitsev", "Tacoma, WA, USA", + "SHA1(), AES_ENCRYPT(), AES_DECRYPT(), bug fixing" }, + {NULL, NULL, NULL} +}; diff --git a/sql/discover.cc b/sql/discover.cc index 1251055c70e..2a3da55f154 100644 --- a/sql/discover.cc +++ b/sql/discover.cc @@ -55,7 +55,8 @@ int readfrm(const char *name, *frmdata= NULL; // In case of errors *len= 0; error= 1; - if ((file=my_open(fn_format(index_file,name,"",reg_ext,4), + if ((file=my_open(fn_format(index_file,name,"",reg_ext, + MY_UNPACK_FILENAME|MY_APPEND_EXT), O_RDONLY | O_SHARE, MYF(0))) < 0) goto err_end; @@ -112,7 +113,8 @@ int writefrm(const char *name, const void *frmdata, uint len) //DBUG_DUMP("frmdata", (char*)frmdata, len); error= 0; - if ((file=my_create(fn_format(index_file,name,"",reg_ext,4), + if ((file=my_create(fn_format(index_file,name,"",reg_ext, + MY_UNPACK_FILENAME|MY_APPEND_EXT), CREATE_MODE,O_RDWR | O_TRUNC,MYF(MY_WME))) >= 0) { if (my_write(file,(byte*)frmdata,len,MYF(MY_WME | MY_NABP))) diff --git a/sql/event.cc b/sql/event.cc new file mode 100644 index 00000000000..8eca4b1dcd3 --- /dev/null +++ b/sql/event.cc @@ -0,0 +1,1606 @@ +/* Copyright (C) 2004-2005 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "event_priv.h" +#include "event.h" +#include "sp.h" + +/* + TODO list : + - The default value of created/modified should not be 0000-00-00 because of + STRICT mode restricions. + + - CREATE EVENT should not go into binary log! Does it now? The SQL statements + issued by the EVENT are replicated. + I have an idea how to solve the problem at failover. So the status field + will be ENUM('DISABLED', 'ENABLED', 'SLAVESIDE_DISABLED'). + In this case when CREATE EVENT is replicated it should go into the binary + as SLAVESIDE_DISABLED if it is ENABLED, when it's created as DISABLEd it + should be replicated as disabled. If an event is ALTERed as DISABLED the + query should go untouched into the binary log, when ALTERed as enable then + it should go as SLAVESIDE_DISABLED. This is regarding the SQL interface. + TT routines however modify mysql.event internally and this does not go the log + so in this case queries has to be injected into the log...somehow... or + maybe a solution is RBR for this case, because the event may go only from + ENABLED to DISABLED status change and this is safe for replicating. As well + an event may be deleted which is also safe for RBR. + + - Maybe move all allocations during parsing to evex_mem_root thus saving + double parsing in evex_create_event! + + - If the server is killed (stopping) try to kill executing events? + + - What happens if one renames an event in the DB while it is in memory? + Or even deleting it? + + - Consider using conditional variable when doing shutdown instead of + waiting till all worker threads end. + + - Make Event_timed::get_show_create_event() work + + - Add logging to file + + - Move comparison code to class Event_timed + +Warning: + - For now parallel execution is not possible because the same sp_head cannot be + executed few times!!! There is still no lock attached to particular event. + +*/ + + +QUEUE EVEX_EQ_NAME; +MEM_ROOT evex_mem_root; +time_t mysql_event_last_create_time= 0L; + + +static TABLE_FIELD_W_TYPE event_table_fields[EVEX_FIELD_COUNT] = { + { + {(char *) STRING_WITH_LEN("db")}, + {(char *) STRING_WITH_LEN("char(64)")}, + {(char *) STRING_WITH_LEN("utf8")} + }, + { + {(char *) STRING_WITH_LEN("name")}, + {(char *) STRING_WITH_LEN("char(64)")}, + {(char *) STRING_WITH_LEN("utf8")} + }, + { + {(char *) STRING_WITH_LEN("body")}, + {(char *) STRING_WITH_LEN("longblob")}, + {NULL, 0} + }, + { + {(char *) STRING_WITH_LEN("definer")}, + {(char *) STRING_WITH_LEN("char(77)")}, + {(char *) STRING_WITH_LEN("utf8")} + }, + { + {(char *) STRING_WITH_LEN("execute_at")}, + {(char *) STRING_WITH_LEN("datetime")}, + {NULL, 0} + }, + { + {(char *) STRING_WITH_LEN("interval_value")}, + {(char *) STRING_WITH_LEN("int(11)")}, + {NULL, 0} + }, + { + {(char *) STRING_WITH_LEN("interval_field")}, + {(char *) STRING_WITH_LEN("enum('YEAR','QUARTER','MONTH','DAY'," + "'HOUR','MINUTE','WEEK','SECOND','MICROSECOND','YEAR_MONTH','DAY_HOUR'," + "'DAY_MINUTE','DAY_SECOND','HOUR_MINUTE','HOUR_SECOND','MINUTE_SECOND'," + "'DAY_MICROSECOND','HOUR_MICROSECOND','MINUTE_MICROSECOND'," + "'SECOND_MICROSECOND')")}, + {NULL, 0} + }, + { + {(char *) STRING_WITH_LEN("created")}, + {(char *) STRING_WITH_LEN("timestamp")}, + {NULL, 0} + }, + { + {(char *) STRING_WITH_LEN("modified")}, + {(char *) STRING_WITH_LEN("timestamp")}, + {NULL, 0} + }, + { + {(char *) STRING_WITH_LEN("last_executed")}, + {(char *) STRING_WITH_LEN("datetime")}, + {NULL, 0} + }, + { + {(char *) STRING_WITH_LEN("starts")}, + {(char *) STRING_WITH_LEN("datetime")}, + {NULL, 0} + }, + { + {(char *) STRING_WITH_LEN("ends")}, + {(char *) STRING_WITH_LEN("datetime")}, + {NULL, 0} + }, + { + {(char *) STRING_WITH_LEN("status")}, + {(char *) STRING_WITH_LEN("enum('ENABLED','DISABLED')")}, + {NULL, 0} + }, + { + {(char *) STRING_WITH_LEN("on_completion")}, + {(char *) STRING_WITH_LEN("enum('DROP','PRESERVE')")}, + {NULL, 0} + }, + { + {(char *) STRING_WITH_LEN("sql_mode")}, + {(char *) STRING_WITH_LEN("set('REAL_AS_FLOAT','PIPES_AS_CONCAT','ANSI_QUOTES'," + "'IGNORE_SPACE','NOT_USED','ONLY_FULL_GROUP_BY','NO_UNSIGNED_SUBTRACTION'," + "'NO_DIR_IN_CREATE','POSTGRESQL','ORACLE','MSSQL','DB2','MAXDB'," + "'NO_KEY_OPTIONS','NO_TABLE_OPTIONS','NO_FIELD_OPTIONS','MYSQL323','MYSQL40'," + "'ANSI','NO_AUTO_VALUE_ON_ZERO','NO_BACKSLASH_ESCAPES','STRICT_TRANS_TABLES'," + "'STRICT_ALL_TABLES','NO_ZERO_IN_DATE','NO_ZERO_DATE','INVALID_DATES'," + "'ERROR_FOR_DIVISION_BY_ZERO','TRADITIONAL','NO_AUTO_CREATE_USER'," + "'HIGH_NOT_PRECEDENCE')")}, + {NULL, 0} + }, + { + {(char *) STRING_WITH_LEN("comment")}, + {(char *) STRING_WITH_LEN("char(64)")}, + {(char *) STRING_WITH_LEN("utf8")} + } +}; + + +LEX_STRING interval_type_to_name[] = { + {(char *) STRING_WITH_LEN("YEAR")}, + {(char *) STRING_WITH_LEN("QUARTER")}, + {(char *) STRING_WITH_LEN("MONTH")}, + {(char *) STRING_WITH_LEN("DAY")}, + {(char *) STRING_WITH_LEN("HOUR")}, + {(char *) STRING_WITH_LEN("MINUTE")}, + {(char *) STRING_WITH_LEN("WEEK")}, + {(char *) STRING_WITH_LEN("SECOND")}, + {(char *) STRING_WITH_LEN("MICROSECOND")}, + {(char *) STRING_WITH_LEN("YEAR_MONTH")}, + {(char *) STRING_WITH_LEN("DAY_HOUR")}, + {(char *) STRING_WITH_LEN("DAY_MINUTE")}, + {(char *) STRING_WITH_LEN("DAY_SECOND")}, + {(char *) STRING_WITH_LEN("HOUR_MINUTE")}, + {(char *) STRING_WITH_LEN("HOUR_SECOND")}, + {(char *) STRING_WITH_LEN("MINUTE_SECOND")}, + {(char *) STRING_WITH_LEN("DAY_MICROSECOND")}, + {(char *) STRING_WITH_LEN("HOUR_MICROSECOND")}, + {(char *) STRING_WITH_LEN("MINUTE_MICROSECOND")}, + {(char *) STRING_WITH_LEN("SECOND_MICROSECOND")} +}; + + + +/* + Inits the scheduler queue - prioritized queue from mysys/queue.c + + Synopsis + evex_queue_init() + + queue - pointer the the memory to be initialized as queue. has to be + allocated from the caller + + Notes + During initialization the queue is sized for 30 events, and when is full + will auto extent with 30. +*/ + +void +evex_queue_init(EVEX_QUEUE_TYPE *queue) +{ + if (init_queue_ex(queue, 30 /*num_el*/, 0 /*offset*/, 0 /*smallest_on_top*/, + event_timed_compare_q, NULL, 30 /*auto_extent*/)) + sql_print_error("Insufficient memory to initialize executing queue."); +} + + +/* + Compares 2 LEX strings regarding case. + + Synopsis + my_time_compare() + + s - first LEX_STRING + t - second LEX_STRING + cs - charset + + RETURNS: + -1 - s < t + 0 - s == t + 1 - s > t + + Notes + TIME.second_part is not considered during comparison +*/ + +int sortcmp_lex_string(LEX_STRING s, LEX_STRING t, CHARSET_INFO *cs) +{ + return cs->coll->strnncollsp(cs, (unsigned char *) s.str,s.length, + (unsigned char *) t.str,t.length, 0); +} + + +/* + Compares 2 TIME structures + + Synopsis + my_time_compare() + + a - first TIME + b - second time + + RETURNS: + -1 - a < b + 0 - a == b + 1 - a > b + + Notes + TIME.second_part is not considered during comparison +*/ + +int +my_time_compare(TIME *a, TIME *b) +{ + +#ifdef ENABLE_WHEN_WE_HAVE_MILLISECOND_IN_TIMESTAMPS + my_ulonglong a_t= TIME_to_ulonglong_datetime(a)*100L + a->second_part; + my_ulonglong b_t= TIME_to_ulonglong_datetime(b)*100L + b->second_part; +#else + my_ulonglong a_t= TIME_to_ulonglong_datetime(a); + my_ulonglong b_t= TIME_to_ulonglong_datetime(b); +#endif + + if (a_t > b_t) + return 1; + else if (a_t < b_t) + return -1; + + return 0; +} + + +/* + Compares the execute_at members of 2 Event_timed instances + + Synopsis + event_timed_compare() + + a - first Event_timed object + b - second Event_timed object + + RETURNS: + -1 - a->execute_at < b->execute_at + 0 - a->execute_at == b->execute_at + 1 - a->execute_at > b->execute_at + + Notes + execute_at.second_part is not considered during comparison +*/ + +int +event_timed_compare(Event_timed *a, Event_timed *b) +{ + return my_time_compare(&a->execute_at, &b->execute_at); +} + + +/* + Compares the execute_at members of 2 Event_timed instances. + Used as callback for the prioritized queue when shifting + elements inside. + + Synopsis + event_timed_compare() + + vptr - not used (set it to NULL) + a - first Event_timed object + b - second Event_timed object + + RETURNS: + -1 - a->execute_at < b->execute_at + 0 - a->execute_at == b->execute_at + 1 - a->execute_at > b->execute_at + + Notes + execute_at.second_part is not considered during comparison +*/ + +int +event_timed_compare_q(void *vptr, byte* a, byte *b) +{ + return event_timed_compare((Event_timed *)a, (Event_timed *)b); +} + + +/* + Reconstructs interval expression from interval type and expression + value that is in form of a value of the smalles entity: + For + YEAR_MONTH - expression is in months + DAY_MINUTE - expression is in minutes + + Synopsis + event_reconstruct_interval_expression() + buf - preallocated String buffer to add the value to + interval - the interval type (for instance YEAR_MONTH) + expression - the value in the lowest entity + + RETURNS + 0 - OK + 1 - Error +*/ + +int +event_reconstruct_interval_expression(String *buf, + interval_type interval, + longlong expression) +{ + ulonglong expr= expression; + char tmp_buff[128], *end; + bool close_quote= TRUE; + int multipl= 0; + char separator=':'; + + switch (interval) { + case INTERVAL_YEAR_MONTH: + multipl= 12; + separator= '-'; + goto common_1_lev_code; + case INTERVAL_DAY_HOUR: + multipl= 24; + separator= ' '; + goto common_1_lev_code; + case INTERVAL_HOUR_MINUTE: + case INTERVAL_MINUTE_SECOND: + multipl= 60; +common_1_lev_code: + buf->append('\''); + end= longlong10_to_str(expression/multipl, tmp_buff, 10); + buf->append(tmp_buff, (uint) (end- tmp_buff)); + expr= expr - (expr/multipl)*multipl; + break; + case INTERVAL_DAY_MINUTE: + { + ulonglong tmp_expr= expr; + + tmp_expr/=(24*60); + buf->append('\''); + end= longlong10_to_str(tmp_expr, tmp_buff, 10); + buf->append(tmp_buff, (uint) (end- tmp_buff));// days + buf->append(' '); + + tmp_expr= expr - tmp_expr*(24*60);//minutes left + end= longlong10_to_str(tmp_expr/60, tmp_buff, 10); + buf->append(tmp_buff, (uint) (end- tmp_buff));// hours + + expr= tmp_expr - (tmp_expr/60)*60; + /* the code after the switch will finish */ + } + break; + case INTERVAL_HOUR_SECOND: + { + ulonglong tmp_expr= expr; + + buf->append('\''); + end= longlong10_to_str(tmp_expr/3600, tmp_buff, 10); + buf->append(tmp_buff, (uint) (end- tmp_buff));// hours + buf->append(':'); + + tmp_expr= tmp_expr - (tmp_expr/3600)*3600; + end= longlong10_to_str(tmp_expr/60, tmp_buff, 10); + buf->append(tmp_buff, (uint) (end- tmp_buff));// minutes + + expr= tmp_expr - (tmp_expr/60)*60; + /* the code after the switch will finish */ + } + break; + case INTERVAL_DAY_SECOND: + { + ulonglong tmp_expr= expr; + + tmp_expr/=(24*3600); + buf->append('\''); + end= longlong10_to_str(tmp_expr, tmp_buff, 10); + buf->append(tmp_buff, (uint) (end- tmp_buff));// days + buf->append(' '); + + tmp_expr= expr - tmp_expr*(24*3600);//seconds left + end= longlong10_to_str(tmp_expr/3600, tmp_buff, 10); + buf->append(tmp_buff, (uint) (end- tmp_buff));// hours + buf->append(':'); + + tmp_expr= tmp_expr - (tmp_expr/3600)*3600; + end= longlong10_to_str(tmp_expr/60, tmp_buff, 10); + buf->append(tmp_buff, (uint) (end- tmp_buff));// minutes + + expr= tmp_expr - (tmp_expr/60)*60; + /* the code after the switch will finish */ + } + break; + case INTERVAL_DAY_MICROSECOND: + case INTERVAL_HOUR_MICROSECOND: + case INTERVAL_MINUTE_MICROSECOND: + case INTERVAL_SECOND_MICROSECOND: + case INTERVAL_MICROSECOND: + my_error(ER_NOT_SUPPORTED_YET, MYF(0), "MICROSECOND"); + return 1; + break; + case INTERVAL_QUARTER: + expr/= 3; + close_quote= FALSE; + break; + case INTERVAL_WEEK: + expr/= 7; + default: + close_quote= FALSE; + break; + } + if (close_quote) + buf->append(separator); + end= longlong10_to_str(expr, tmp_buff, 10); + buf->append(tmp_buff, (uint) (end- tmp_buff)); + if (close_quote) + buf->append('\''); + + return 0; +} + + +/* + Open mysql.event table for read + + SYNOPSIS + evex_open_event_table_for_read() + thd Thread context + lock_type How to lock the table + table The table pointer + + RETURN + 1 Cannot lock table + 2 The table is corrupted - different number of fields + 0 OK +*/ + +int +evex_open_event_table(THD *thd, enum thr_lock_type lock_type, TABLE **table) +{ + TABLE_LIST tables; + DBUG_ENTER("open_proc_table"); + + bzero((char*) &tables, sizeof(tables)); + tables.db= (char*) "mysql"; + tables.table_name= tables.alias= (char*) "event"; + tables.lock_type= lock_type; + + if (simple_open_n_lock_tables(thd, &tables)) + DBUG_RETURN(1); + + if (table_check_intact(tables.table, EVEX_FIELD_COUNT, event_table_fields, + &mysql_event_last_create_time, + ER_CANNOT_LOAD_FROM_TABLE)) + { + close_thread_tables(thd); + DBUG_RETURN(2); + } + *table= tables.table; + + DBUG_RETURN(0); +} + + +/* + Find row in open mysql.event table representing event + + SYNOPSIS + evex_db_find_event_aux() + thd Thread context + et evet_timed object containing dbname, name & definer + table TABLE object for open mysql.event table. + + RETURN VALUE + 0 - Routine found + EVEX_KEY_NOT_FOUND - No routine with given name +*/ + +inline int +evex_db_find_event_aux(THD *thd, Event_timed *et, TABLE *table) +{ + return evex_db_find_event_by_name(thd, et->dbname, et->name, + et->definer, table); +} + + +/* + Find row in open mysql.event table representing event + + SYNOPSIS + evex_db_find_event_by_name() + thd Thread context + dbname Name of event's database + rname Name of the event inside the db + table TABLE object for open mysql.event table. + + RETURN VALUE + 0 - Routine found + EVEX_KEY_NOT_FOUND - No routine with given name +*/ + +int +evex_db_find_event_by_name(THD *thd, const LEX_STRING dbname, + const LEX_STRING ev_name, + const LEX_STRING user_name, + TABLE *table) +{ + byte key[MAX_KEY_LENGTH]; + DBUG_ENTER("evex_db_find_event_by_name"); + DBUG_PRINT("enter", ("name: %.*s", ev_name.length, ev_name.str)); + + /* + Create key to find row. We have to use field->store() to be able to + handle VARCHAR and CHAR fields. + Assumption here is that the two first fields in the table are + 'db' and 'name' and the first key is the primary key over the + same fields. + */ + if (dbname.length > table->field[EVEX_FIELD_DB]->field_length || + ev_name.length > table->field[EVEX_FIELD_NAME]->field_length || + user_name.length > table->field[EVEX_FIELD_DEFINER]->field_length) + + DBUG_RETURN(EVEX_KEY_NOT_FOUND); + + table->field[EVEX_FIELD_DB]->store(dbname.str, dbname.length, &my_charset_bin); + table->field[EVEX_FIELD_NAME]->store(ev_name.str, ev_name.length, + &my_charset_bin); + table->field[EVEX_FIELD_DEFINER]->store(user_name.str, user_name.length, + &my_charset_bin); + + key_copy(key, table->record[0], table->key_info, table->key_info->key_length); + + if (table->file->index_read_idx(table->record[0], 0, key, + table->key_info->key_length,HA_READ_KEY_EXACT)) + DBUG_RETURN(EVEX_KEY_NOT_FOUND); + + DBUG_RETURN(0); +} + + +/* + Puts some data common to CREATE and ALTER EVENT into a row. + + SYNOPSIS + evex_fill_row() + thd THD + table the row to fill out + et Event's data + + Returns + 0 - ok + EVEX_GENERAL_ERROR - bad data + EVEX_GET_FIELD_FAILED - field count does not match. table corrupted? + + DESCRIPTION + Used both when an event is created and when it is altered. +*/ + +static int +evex_fill_row(THD *thd, TABLE *table, Event_timed *et, my_bool is_update) +{ + enum evex_table_field field_num; + + DBUG_ENTER("evex_fill_row"); + + DBUG_PRINT("info", ("dbname=[%s]", et->dbname.str)); + DBUG_PRINT("info", ("name =[%s]", et->name.str)); + DBUG_PRINT("info", ("body =[%s]", et->body.str)); + + if (table->field[field_num= EVEX_FIELD_DB]-> + store(et->dbname.str, et->dbname.length, system_charset_info)) + goto trunc_err; + + if (table->field[field_num= EVEX_FIELD_NAME]-> + store(et->name.str, et->name.length, system_charset_info)) + goto trunc_err; + + /* both ON_COMPLETION and STATUS are NOT NULL thus not calling set_notnull() */ + table->field[EVEX_FIELD_ON_COMPLETION]->store((longlong)et->on_completion, + true); + + table->field[EVEX_FIELD_STATUS]->store((longlong)et->status, true); + + /* + Change the SQL_MODE only if body was present in an ALTER EVENT and of course + always during CREATE EVENT. + */ + if (et->body.str) + { + table->field[EVEX_FIELD_SQL_MODE]->store((longlong)thd->variables.sql_mode, + true); + + if (table->field[field_num= EVEX_FIELD_BODY]-> + store(et->body.str, et->body.length, system_charset_info)) + goto trunc_err; + } + + if (et->expression) + { + table->field[EVEX_FIELD_INTERVAL_EXPR]->set_notnull(); + table->field[EVEX_FIELD_INTERVAL_EXPR]->store((longlong)et->expression,true); + + table->field[EVEX_FIELD_TRANSIENT_INTERVAL]->set_notnull(); + /* + In the enum (C) intervals start from 0 but in mysql enum valid values start + from 1. Thus +1 offset is needed! + */ + table->field[EVEX_FIELD_TRANSIENT_INTERVAL]->store((longlong)et->interval+1, + true); + + table->field[EVEX_FIELD_EXECUTE_AT]->set_null(); + + if (!et->starts_null) + { + table->field[EVEX_FIELD_STARTS]->set_notnull(); + table->field[EVEX_FIELD_STARTS]-> + store_time(&et->starts, MYSQL_TIMESTAMP_DATETIME); + } + + if (!et->ends_null) + { + table->field[EVEX_FIELD_ENDS]->set_notnull(); + table->field[EVEX_FIELD_ENDS]-> + store_time(&et->ends, MYSQL_TIMESTAMP_DATETIME); + } + } + else if (et->execute_at.year) + { + table->field[EVEX_FIELD_INTERVAL_EXPR]->set_null(); + table->field[EVEX_FIELD_TRANSIENT_INTERVAL]->set_null(); + table->field[EVEX_FIELD_STARTS]->set_null(); + table->field[EVEX_FIELD_ENDS]->set_null(); + + table->field[EVEX_FIELD_EXECUTE_AT]->set_notnull(); + table->field[EVEX_FIELD_EXECUTE_AT]->store_time(&et->execute_at, + MYSQL_TIMESTAMP_DATETIME); + } + else + { + DBUG_ASSERT(is_update); + /* + it is normal to be here when the action is update + this is an error if the action is create. something is borked + */ + } + + ((Field_timestamp *)table->field[EVEX_FIELD_MODIFIED])->set_time(); + + if (et->comment.str) + { + if (table->field[field_num= EVEX_FIELD_COMMENT]->store(et->comment.str, + et->comment.length, + system_charset_info)) + goto trunc_err; + } + + DBUG_RETURN(0); +trunc_err: + my_error(ER_EVENT_DATA_TOO_LONG, MYF(0), table->field[field_num]->field_name); + DBUG_RETURN(EVEX_GENERAL_ERROR); +} + + +/* + Creates an event in mysql.event + + SYNOPSIS + db_create_event() + thd THD + et Event_timed object containing information for the event + create_if_not - if an warning should be generated in case event exists + rows_affected - how many rows were affected + + Return value + 0 - OK + EVEX_GENERAL_ERROR - Failure + DESCRIPTION + Creates an event. Relies on evex_fill_row which is shared with + db_update_event. The name of the event is inside "et". +*/ + +static int +db_create_event(THD *thd, Event_timed *et, my_bool create_if_not, + uint *rows_affected) +{ + int ret= 0; + TABLE *table; + char olddb[128]; + bool dbchanged= false; + DBUG_ENTER("db_create_event"); + DBUG_PRINT("enter", ("name: %.*s", et->name.length, et->name.str)); + + *rows_affected= 0; + DBUG_PRINT("info", ("open mysql.event for update")); + if (evex_open_event_table(thd, TL_WRITE, &table)) + { + my_error(ER_EVENT_OPEN_TABLE_FAILED, MYF(0)); + goto err; + } + + DBUG_PRINT("info", ("check existance of an event with the same name")); + if (!evex_db_find_event_aux(thd, et, table)) + { + if (create_if_not) + { + push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_NOTE, + ER_EVENT_ALREADY_EXISTS, ER(ER_EVENT_ALREADY_EXISTS), + et->name.str); + goto ok; + } + my_error(ER_EVENT_ALREADY_EXISTS, MYF(0), et->name.str); + goto err; + } + + DBUG_PRINT("info", ("non-existant, go forward")); + if ((ret= sp_use_new_db(thd, et->dbname.str,olddb, sizeof(olddb),0, &dbchanged))) + { + my_error(ER_BAD_DB_ERROR, MYF(0)); + goto err; + } + + restore_record(table, s->default_values); // Get default values for fields + + if (system_charset_info->cset->numchars(system_charset_info, et->dbname.str, + et->dbname.str + et->dbname.length) + > EVEX_DB_FIELD_LEN) + { + my_error(ER_TOO_LONG_IDENT, MYF(0), et->dbname.str); + goto err; + } + if (system_charset_info->cset->numchars(system_charset_info, et->name.str, + et->name.str + et->name.length) + > EVEX_DB_FIELD_LEN) + { + my_error(ER_TOO_LONG_IDENT, MYF(0), et->name.str); + goto err; + } + + if (et->body.length > table->field[EVEX_FIELD_BODY]->field_length) + { + my_error(ER_TOO_LONG_BODY, MYF(0), et->name.str); + goto err; + } + + if (!(et->expression) && !(et->execute_at.year)) + { + DBUG_PRINT("error", ("neither expression nor execute_at are set!")); + my_error(ER_EVENT_NEITHER_M_EXPR_NOR_M_AT, MYF(0)); + goto err; + } + + if ((ret=table->field[EVEX_FIELD_DEFINER]->store(et->definer.str, + et->definer.length, + system_charset_info))) + { + my_error(ER_EVENT_STORE_FAILED, MYF(0), et->name.str, ret); + goto err; + } + + ((Field_timestamp *)table->field[EVEX_FIELD_CREATED])->set_time(); + + /* + evex_fill_row() calls my_error() in case of error so no need to + handle it here + */ + if ((ret= evex_fill_row(thd, table, et, false))) + goto err; + + if (table->file->ha_write_row(table->record[0])) + { + my_error(ER_EVENT_STORE_FAILED, MYF(0), et->name.str, ret); + goto err; + } + +#ifdef USE_THIS_CODE_AS_TEMPLATE_WHEN_EVENT_REPLICATION_IS_AGREED + if (mysql_bin_log.is_open()) + { + thd->clear_error(); + /* Such a statement can always go directly to binlog, no trans cache */ + thd->binlog_query(THD::MYSQL_QUERY_TYPE, + thd->query, thd->query_length, FALSE, FALSE); + } +#endif + + *rows_affected= 1; +ok: + if (dbchanged) + (void) mysql_change_db(thd, olddb, 1); + if (table) + close_thread_tables(thd); + DBUG_RETURN(EVEX_OK); + +err: + if (dbchanged) + (void) mysql_change_db(thd, olddb, 1); + if (table) + close_thread_tables(thd); + DBUG_RETURN(EVEX_GENERAL_ERROR); +} + + +/* + Used to execute ALTER EVENT. Pendant to evex_update_event(). + + SYNOPSIS + db_update_event() + thd THD + sp_name the name of the event to alter + et event's data + + NOTES + sp_name is passed since this is the name of the event to + alter in case of RENAME TO. +*/ + +static int +db_update_event(THD *thd, Event_timed *et, sp_name *new_name) +{ + TABLE *table; + int ret= EVEX_OPEN_TABLE_FAILED; + DBUG_ENTER("db_update_event"); + DBUG_PRINT("enter", ("dbname: %.*s", et->dbname.length, et->dbname.str)); + DBUG_PRINT("enter", ("name: %.*s", et->name.length, et->name.str)); + DBUG_PRINT("enter", ("user: %.*s", et->name.length, et->name.str)); + if (new_name) + DBUG_PRINT("enter", ("rename to: %.*s", new_name->m_name.length, + new_name->m_name.str)); + + if (evex_open_event_table(thd, TL_WRITE, &table)) + { + my_error(ER_EVENT_OPEN_TABLE_FAILED, MYF(0)); + goto err; + } + + /* first look whether we overwrite */ + if (new_name) + { + if (!sortcmp_lex_string(et->name, new_name->m_name, system_charset_info) && + !sortcmp_lex_string(et->dbname, new_name->m_db, system_charset_info)) + { + my_error(ER_EVENT_SAME_NAME, MYF(0), et->name.str); + goto err; + } + + if (!evex_db_find_event_by_name(thd, new_name->m_db, new_name->m_name, + et->definer, table)) + { + my_error(ER_EVENT_ALREADY_EXISTS, MYF(0), new_name->m_name.str); + goto err; + } + } + /* + ...and then whether there is such an event. don't exchange the blocks + because you will get error 120 from table handler because new_name will + overwrite the key and SE will tell us that it cannot find the already found + row (copied into record[1] later + */ + if (EVEX_KEY_NOT_FOUND == evex_db_find_event_aux(thd, et, table)) + { + my_error(ER_EVENT_DOES_NOT_EXIST, MYF(0), et->name.str); + goto err; + } + + store_record(table,record[1]); + + /* Don't update create on row update. */ + table->timestamp_field_type= TIMESTAMP_NO_AUTO_SET; + + /* evex_fill_row() calls my_error() in case of error so no need to handle it here */ + if ((ret= evex_fill_row(thd, table, et, true))) + goto err; + + if (new_name) + { + table->field[EVEX_FIELD_DB]-> + store(new_name->m_db.str, new_name->m_db.length, system_charset_info); + table->field[EVEX_FIELD_NAME]-> + store(new_name->m_name.str, new_name->m_name.length, system_charset_info); + } + + if ((ret= table->file->ha_update_row(table->record[1], table->record[0]))) + { + my_error(ER_EVENT_STORE_FAILED, MYF(0), et->name.str, ret); + goto err; + } + + /* close mysql.event or we crash later when loading the event from disk */ + close_thread_tables(thd); + DBUG_RETURN(0); + +err: + if (table) + close_thread_tables(thd); + DBUG_RETURN(EVEX_GENERAL_ERROR); +} + + +/* + Looks for a named event in mysql.event and in case of success returns + an object will data loaded from the table. + + SYNOPSIS + db_find_event() + thd THD + name the name of the event to find + definer who owns the event + ett event's data if event is found + tbl TABLE object to use when not NULL + + NOTES + 1) Use sp_name for look up, return in **ett if found + 2) tbl is not closed at exit +*/ + +static int +db_find_event(THD *thd, sp_name *name, LEX_STRING *definer, Event_timed **ett, + TABLE *tbl, MEM_ROOT *root) +{ + TABLE *table; + int ret; + Event_timed *et=NULL; + DBUG_ENTER("db_find_event"); + DBUG_PRINT("enter", ("name: %*s", name->m_name.length, name->m_name.str)); + + if (!root) + root= &evex_mem_root; + + if (tbl) + table= tbl; + else if (evex_open_event_table(thd, TL_READ, &table)) + { + my_error(ER_EVENT_OPEN_TABLE_FAILED, MYF(0)); + ret= EVEX_GENERAL_ERROR; + goto done; + } + + if ((ret= evex_db_find_event_by_name(thd, name->m_db, name->m_name, *definer, + table))) + { + my_error(ER_EVENT_DOES_NOT_EXIST, MYF(0), name->m_name.str); + goto done; + } + et= new Event_timed; + + /* + 1)The table should not be closed beforehand. ::load_from_row() only loads + and does not compile + + 2)::load_from_row() is silent on error therefore we emit error msg here + */ + if ((ret= et->load_from_row(root, table))) + { + my_error(ER_CANNOT_LOAD_FROM_TABLE, MYF(0)); + goto done; + } + +done: + if (ret && et) + { + delete et; + et= 0; + } + /* don't close the table if we haven't opened it ourselves */ + if (!tbl && table) + close_thread_tables(thd); + *ett= et; + DBUG_RETURN(ret); +} + + +/* + Looks for a named event in mysql.event and then loads it from + the table, compiles it and insert it into the cache. + + SYNOPSIS + evex_load_and_compile_event() + thd THD + spn the name of the event to alter + definer who is the owner + use_lock whether to obtain a lock on LOCK_event_arrays or not + + RETURN VALUE + 0 - OK + < 0 - error (in this case underlying functions call my_error()). +*/ + +static int +evex_load_and_compile_event(THD * thd, sp_name *spn, LEX_STRING definer, + bool use_lock) +{ + int ret= 0; + MEM_ROOT *tmp_mem_root; + Event_timed *ett; + Open_tables_state backup; + + DBUG_ENTER("db_load_and_compile_event"); + DBUG_PRINT("enter", ("name: %*s", spn->m_name.length, spn->m_name.str)); + + tmp_mem_root= thd->mem_root; + thd->mem_root= &evex_mem_root; + + thd->reset_n_backup_open_tables_state(&backup); + /* no need to use my_error() here because db_find_event() has done it */ + ret= db_find_event(thd, spn, &definer, &ett, NULL, NULL); + thd->restore_backup_open_tables_state(&backup); + if (ret) + goto done; + + /* + allocate on evex_mem_root. if you call without evex_mem_root + then sphead will not be cleared! + */ + if ((ret= ett->compile(thd, &evex_mem_root))) + goto done; + + ett->compute_next_execution_time(); + if (use_lock) + VOID(pthread_mutex_lock(&LOCK_event_arrays)); + + evex_queue_insert(&EVEX_EQ_NAME, (EVEX_PTOQEL) ett); + + /* + There is a copy in the array which we don't need. sphead won't be + destroyed. + */ + + if (use_lock) + VOID(pthread_mutex_unlock(&LOCK_event_arrays)); + +done: + if (thd->mem_root != tmp_mem_root) + thd->mem_root= tmp_mem_root; + + DBUG_RETURN(ret); +} + + +/* + Removes from queue in memory the event which is identified by the tupple + (db, name). + + SYNOPSIS + evex_remove_from_cache() + + db - db name + name - event name + use_lock - whether to lock the mutex LOCK_event_arrays or not in case it + has been already locked outside + is_drop - if an event is currently being executed then we can also delete + the Event_timed instance, so we alarm the event that it should + drop itself if this parameter is set to TRUE. It's false on + ALTER EVENT. + + RETURNS + 0 OK (always) +*/ + +static int +evex_remove_from_cache(LEX_STRING *db, LEX_STRING *name, bool use_lock, + bool is_drop) +{ + //ToDo : Add definer to the tuple (db, name) to become triple + uint i; + int ret= 0; + + DBUG_ENTER("evex_remove_from_cache"); + /* + It is possible that 2 (or 1) pass(es) won't find the event in memory. + The reason is that DISABLED events are not cached. + */ + + if (use_lock) + VOID(pthread_mutex_lock(&LOCK_event_arrays)); + + for (i= 0; i < evex_queue_num_elements(EVEX_EQ_NAME); ++i) + { + Event_timed *et= evex_queue_element(&EVEX_EQ_NAME, i, Event_timed*); + DBUG_PRINT("info", ("[%s.%s]==[%s.%s]?",db->str,name->str, et->dbname.str, + et->name.str)); + if (!sortcmp_lex_string(*name, et->name, system_charset_info) && + !sortcmp_lex_string(*db, et->dbname, system_charset_info)) + { + if (et->can_spawn_now()) + { + DBUG_PRINT("evex_remove_from_cache", ("not running - free and delete")); + et->free_sp(); + delete et; + } + else + { + DBUG_PRINT("evex_remove_from_cache", + ("running.defer mem free. is_drop=%d", is_drop)); + et->flags|= EVENT_EXEC_NO_MORE; + et->dropped= is_drop; + } + DBUG_PRINT("evex_remove_from_cache", ("delete from queue")); + evex_queue_delete_element(&EVEX_EQ_NAME, i); + /* ok, we have cleaned */ + ret= 0; + goto done; + } + } + +done: + if (use_lock) + VOID(pthread_mutex_unlock(&LOCK_event_arrays)); + + DBUG_RETURN(ret); +} + + +/* + The function exported to the world for creating of events. + + SYNOPSIS + evex_create_event() + thd THD + et event's data + create_options Options specified when in the query. We are + interested whether there is IF NOT EXISTS + rows_affected How many rows were affected + + NOTES + - in case there is an event with the same name (db) and + IF NOT EXISTS is specified, an warning is put into the W stack. +*/ + +int +evex_create_event(THD *thd, Event_timed *et, uint create_options, + uint *rows_affected) +{ + int ret = 0; + + DBUG_ENTER("evex_create_event"); + DBUG_PRINT("enter", ("name: %*s options:%d", et->name.length, + et->name.str, create_options)); + + if ((ret = db_create_event(thd, et, + create_options & HA_LEX_CREATE_IF_NOT_EXISTS, + rows_affected))) + goto done; + + VOID(pthread_mutex_lock(&LOCK_evex_running)); + if (evex_is_running && et->status == MYSQL_EVENT_ENABLED) + { + sp_name spn(et->dbname, et->name); + ret= evex_load_and_compile_event(thd, &spn, et->definer, true); + } + VOID(pthread_mutex_unlock(&LOCK_evex_running)); + +done: + /* No need to close the table, it will be closed in sql_parse::do_command */ + + DBUG_RETURN(ret); +} + + +/* + The function exported to the world for alteration of events. + + SYNOPSIS + evex_update_event() + thd THD + et event's data + new_name set in case of RENAME TO. + + NOTES + et contains data about dbname and event name. + new_name is the new name of the event, if not null (this means + that RENAME TO was specified in the query) +*/ + +int +evex_update_event(THD *thd, Event_timed *et, sp_name *new_name, + uint *rows_affected) +{ + int ret; + bool need_second_pass= true; + + DBUG_ENTER("evex_update_event"); + DBUG_PRINT("enter", ("name: %*s", et->name.length, et->name.str)); + + /* + db_update_event() opens & closes the table to prevent + crash later in the code when loading and compiling the new definition. + Also on error conditions my_error() is called so no need to handle here + */ + if ((ret= db_update_event(thd, et, new_name))) + goto done; + + VOID(pthread_mutex_lock(&LOCK_evex_running)); + if (!evex_is_running) + UNLOCK_MUTEX_AND_BAIL_OUT(LOCK_evex_running, done); + + VOID(pthread_mutex_lock(&LOCK_event_arrays)); + evex_remove_from_cache(&et->dbname, &et->name, false, false); + if (et->status == MYSQL_EVENT_ENABLED) + { + if (new_name) + ret= evex_load_and_compile_event(thd, new_name, et->definer, false); + else + { + sp_name spn(et->dbname, et->name); + ret= evex_load_and_compile_event(thd, &spn, et->definer, false); + } + if (ret == EVEX_COMPILE_ERROR) + my_error(ER_EVENT_COMPILE_ERROR, MYF(0)); + } + VOID(pthread_mutex_unlock(&LOCK_event_arrays)); + VOID(pthread_mutex_unlock(&LOCK_evex_running)); + +done: + DBUG_RETURN(ret); +} + + +/* + Drops an event + + SYNOPSIS + db_drop_event() + thd THD + et event's name + drop_if_exists if set and the event not existing => warning onto the stack + rows_affected affected number of rows is returned heres +*/ + +int db_drop_event(THD *thd, Event_timed *et, bool drop_if_exists, + uint *rows_affected) +{ + TABLE *table; + Open_tables_state backup; + int ret; + + DBUG_ENTER("db_drop_event"); + ret= EVEX_OPEN_TABLE_FAILED; + + thd->reset_n_backup_open_tables_state(&backup); + if (evex_open_event_table(thd, TL_WRITE, &table)) + { + my_error(ER_EVENT_OPEN_TABLE_FAILED, MYF(0)); + goto done; + } + + if (!(ret= evex_db_find_event_aux(thd, et, table))) + { + if ((ret= table->file->ha_delete_row(table->record[0]))) + { + my_error(ER_EVENT_CANNOT_DELETE, MYF(0)); + goto done; + } + } + else if (ret == EVEX_KEY_NOT_FOUND) + { + if (drop_if_exists) + { + push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_NOTE, + ER_SP_DOES_NOT_EXIST, ER(ER_SP_DOES_NOT_EXIST), + "Event", et->name.str); + ret= 0; + } else + my_error(ER_EVENT_DOES_NOT_EXIST, MYF(0), et->name.str); + goto done; + } + + +done: + /* + evex_drop_event() is used by Event_timed::drop therefore + we have to close our thread tables. + */ + close_thread_tables(thd); + thd->restore_backup_open_tables_state(&backup); + DBUG_RETURN(ret); +} + + +/* + Drops an event + + SYNOPSIS + evex_drop_event() + thd THD + et event's name + drop_if_exists if set and the event not existing => warning onto the stack + rows_affected affected number of rows is returned heres + +*/ + +int +evex_drop_event(THD *thd, Event_timed *et, bool drop_if_exists, + uint *rows_affected) +{ + int ret= 0; + + DBUG_ENTER("evex_drop_event"); + + + VOID(pthread_mutex_lock(&LOCK_evex_running)); + if (evex_is_running) + ret= evex_remove_from_cache(&et->dbname, &et->name, true, true); + VOID(pthread_mutex_unlock(&LOCK_evex_running)); + + if (ret == 1) + ret= 0; + else if (ret == 0) + ret= db_drop_event(thd, et, drop_if_exists, rows_affected); + else + my_error(ER_UNKNOWN_ERROR, MYF(0)); + + DBUG_RETURN(ret); +} + + +/* + SHOW CREATE EVENT + + SYNOPSIS + evex_show_create_event() + thd THD + spn the name of the event (db, name) + definer the definer of the event + + RETURNS + 0 - OK + 1 - Error during writing to the wire +*/ + +int +evex_show_create_event(THD *thd, sp_name *spn, LEX_STRING definer) +{ + int ret; + Event_timed *et= NULL; + Open_tables_state backup; + + DBUG_ENTER("evex_update_event"); + DBUG_PRINT("enter", ("name: %*s", spn->m_name.length, spn->m_name.str)); + + thd->reset_n_backup_open_tables_state(&backup); + ret= db_find_event(thd, spn, &definer, &et, NULL, thd->mem_root); + thd->restore_backup_open_tables_state(&backup); + + if (!ret && et) + { + Protocol *protocol= thd->protocol; + char show_str_buf[768]; + String show_str(show_str_buf, sizeof(show_str_buf), system_charset_info); + List<Item> field_list; + byte *sql_mode_str; + ulong sql_mode_len=0; + + show_str.length(0); + show_str.set_charset(system_charset_info); + + if (et->get_create_event(thd, &show_str)) + DBUG_RETURN(1); + + field_list.push_back(new Item_empty_string("Event", NAME_LEN)); + + sql_mode_str= + sys_var_thd_sql_mode::symbolic_mode_representation(thd, et->sql_mode, + &sql_mode_len); + + field_list.push_back(new Item_empty_string("sql_mode", sql_mode_len)); + + field_list.push_back(new Item_empty_string("Create Event", + show_str.length())); + if (protocol->send_fields(&field_list, Protocol::SEND_NUM_ROWS | + Protocol::SEND_EOF)) + DBUG_RETURN(1); + + protocol->prepare_for_resend(); + protocol->store(et->name.str, et->name.length, system_charset_info); + + protocol->store((char*) sql_mode_str, sql_mode_len, system_charset_info); + + protocol->store(show_str.c_ptr(), show_str.length(), system_charset_info); + ret= protocol->write(); + send_eof(thd); + } + + DBUG_RETURN(ret); +} + + +/* + evex_drop_db_events - Drops all events in the selected database + + thd - Thread + db - ASCIIZ the name of the database + + Returns: + 0 - OK + 1 - Failed to delete a specific row + 2 - Got NULL while reading db name from a row + + Note: + The algo is the following + 1. Go through the in-memory cache, if the scheduler is working + and for every event whose dbname matches the database we drop + check whether is currently in execution: + - Event_timed::can_spawn() returns true -> the event is not + being executed in a child thread. The reason not to use + Event_timed::is_running() is that the latter shows only if + it is being executed, which is 99% of the time in the thread + but there are some initiliazations before and after the + anonymous SP is being called. So if we delete in this moment + -=> *boom*, so we have to check whether the thread has been + spawned and can_spawn() is the right method. + - Event_timed::can_spawn() returns false -> being runned ATM + just set the flags so it should drop itself. +*/ + +int +evex_drop_db_events(THD *thd, char *db) +{ + TABLE *table; + READ_RECORD read_record_info; + int ret= 0; + uint i; + LEX_STRING db_lex= {db, strlen(db)}; + + DBUG_ENTER("evex_drop_db_events"); + DBUG_PRINT("info",("dropping events from %s", db)); + + VOID(pthread_mutex_lock(&LOCK_event_arrays)); + + if ((ret= evex_open_event_table(thd, TL_WRITE, &table))) + { + sql_print_error("Table mysql.event is damaged."); + VOID(pthread_mutex_unlock(&LOCK_event_arrays)); + DBUG_RETURN(SP_OPEN_TABLE_FAILED); + } + + DBUG_PRINT("info",("%d elements in the queue", + evex_queue_num_elements(EVEX_EQ_NAME))); + VOID(pthread_mutex_lock(&LOCK_evex_running)); + if (!evex_is_running) + goto skip_memory; + + for (i= 0; i < evex_queue_num_elements(EVEX_EQ_NAME); ++i) + { + Event_timed *et= evex_queue_element(&EVEX_EQ_NAME, i, Event_timed*); + if (sortcmp_lex_string(et->dbname, db_lex, system_charset_info)) + continue; + + if (et->can_spawn_now_n_lock(thd)) + { + DBUG_PRINT("info",("event %s not running - direct delete", et->name.str)); + if (!(ret= evex_db_find_event_aux(thd, et, table))) + { + DBUG_PRINT("info",("event %s found on disk", et->name.str)); + if ((ret= table->file->ha_delete_row(table->record[0]))) + { + sql_print_error("Error while deleting a row - dropping " + "a database. Skipping the rest."); + my_error(ER_EVENT_DROP_FAILED, MYF(0), et->name.str); + goto end; + } + DBUG_PRINT("info",("deleted event [%s] num [%d]. Time to free mem", + et->name.str, i)); + } + else if (ret == EVEX_KEY_NOT_FOUND) + { + sql_print_error("Expected to find event %s.%s of %s on disk-not there.", + et->dbname.str, et->name.str, et->definer.str); + } + et->free_sp(); + delete et; + et= 0; + /* no need to call et->spawn_unlock because we already cleaned et */ + } + else + { + DBUG_PRINT("info",("event %s is running. setting exec_no_more and dropped", + et->name.str)); + et->flags|= EVENT_EXEC_NO_MORE; + et->dropped= TRUE; + } + DBUG_PRINT("info",("%d elements in the queue", + evex_queue_num_elements(EVEX_EQ_NAME))); + evex_queue_delete_element(&EVEX_EQ_NAME, i);// 0 is top + DBUG_PRINT("info",("%d elements in the queue", + evex_queue_num_elements(EVEX_EQ_NAME))); + /* + decrease so we start at the same position, there will be + less elements in the queue, it will still be ordered so on + next iteration it will be again i the current element or if + no more we finish. + */ + --i; + } + +skip_memory: + /* + The reasoning behind having two loops is the following: + If there was only one loop, the table-scan, then for every element which + matches, the queue in memory has to be searched to remove the element. + While if we go first over the queue and remove what's in there we have only + one pass over it and after finishing it, moving to table-scan for the disabled + events. This needs quite less time and means quite less locking on + LOCK_event_arrays. + */ + DBUG_PRINT("info",("Mem-cache checked, now going to db for disabled events")); + /* only enabled events are in memory, so we go now and delete the rest */ + init_read_record(&read_record_info, thd, table ,NULL,1,0); + while (!(read_record_info.read_record(&read_record_info)) && !ret) + { + char *et_db; + + if ((et_db= get_field(thd->mem_root, table->field[EVEX_FIELD_DB])) == NULL) + { + ret= 2; + break; + } + + LEX_STRING et_db_lex= {et_db, strlen(et_db)}; + if (!sortcmp_lex_string(et_db_lex, db_lex, system_charset_info)) + { + Event_timed ett; + char *ptr; + + if ((ptr= get_field(thd->mem_root, table->field[EVEX_FIELD_STATUS])) + == NullS) + { + sql_print_error("Error while loading from mysql.event. " + "Table probably corrupted"); + goto end; + } + /* + When not running nothing is in memory so we have to clean + everything. + We don't delete EVENT_ENABLED events when the scheduler is running + because maybe this is an event which we asked to drop itself when + it is finished and it hasn't finished yet, so we don't touch it. + It will drop itself. The not running ENABLED events has been already + deleted from ha_delete_row() above in the loop over the QUEUE + (in case the executor is running). + 'D' stands for DISABLED, 'E' for ENABLED - it's an enum + */ + if ((evex_is_running && ptr[0] == 'D') || !evex_is_running) + { + DBUG_PRINT("info", ("Dropping %s.%s", et_db, ett.name.str)); + if ((ret= table->file->ha_delete_row(table->record[0]))) + { + my_error(ER_EVENT_DROP_FAILED, MYF(0), ett.name.str); + goto end; + } + } + } + } + DBUG_PRINT("info",("Disk checked for disabled events. Finishing.")); + +end: + VOID(pthread_mutex_unlock(&LOCK_evex_running)); + VOID(pthread_mutex_unlock(&LOCK_event_arrays)); + end_read_record(&read_record_info); + + thd->version--; /* Force close to free memory */ + + close_thread_tables(thd); + + DBUG_RETURN(ret); +} diff --git a/sql/event.h b/sql/event.h new file mode 100644 index 00000000000..d070f93c575 --- /dev/null +++ b/sql/event.h @@ -0,0 +1,330 @@ +/* Copyright (C) 2004-2005 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef _EVENT_H_ +#define _EVENT_H_ + +#include "sp.h" +#include "sp_head.h" + +#define EVEX_OK SP_OK +#define EVEX_KEY_NOT_FOUND SP_KEY_NOT_FOUND +#define EVEX_OPEN_TABLE_FAILED SP_OPEN_TABLE_FAILED +#define EVEX_WRITE_ROW_FAILED SP_WRITE_ROW_FAILED +#define EVEX_DELETE_ROW_FAILED SP_DELETE_ROW_FAILED +#define EVEX_GET_FIELD_FAILED SP_GET_FIELD_FAILED +#define EVEX_PARSE_ERROR SP_PARSE_ERROR +#define EVEX_INTERNAL_ERROR SP_INTERNAL_ERROR +#define EVEX_NO_DB_ERROR SP_NO_DB_ERROR +#define EVEX_COMPILE_ERROR -19 +#define EVEX_GENERAL_ERROR -20 +#define EVEX_BAD_IDENTIFIER SP_BAD_IDENTIFIER +#define EVEX_BODY_TOO_LONG SP_BODY_TOO_LONG +#define EVEX_BAD_PARAMS -21 +#define EVEX_NOT_RUNNING -22 +#define EVEX_MICROSECOND_UNSUP -23 + +#define EVENT_EXEC_NO_MORE (1L << 0) +#define EVENT_NOT_USED (1L << 1) + + +extern ulong opt_event_executor; + +enum enum_event_on_completion +{ + MYSQL_EVENT_ON_COMPLETION_DROP = 1, + MYSQL_EVENT_ON_COMPLETION_PRESERVE +}; + +enum enum_event_status +{ + MYSQL_EVENT_ENABLED = 1, + MYSQL_EVENT_DISABLED +}; + +enum evex_table_field +{ + EVEX_FIELD_DB = 0, + EVEX_FIELD_NAME, + EVEX_FIELD_BODY, + EVEX_FIELD_DEFINER, + EVEX_FIELD_EXECUTE_AT, + EVEX_FIELD_INTERVAL_EXPR, + EVEX_FIELD_TRANSIENT_INTERVAL, + EVEX_FIELD_CREATED, + EVEX_FIELD_MODIFIED, + EVEX_FIELD_LAST_EXECUTED, + EVEX_FIELD_STARTS, + EVEX_FIELD_ENDS, + EVEX_FIELD_STATUS, + EVEX_FIELD_ON_COMPLETION, + EVEX_FIELD_SQL_MODE, + EVEX_FIELD_COMMENT, + EVEX_FIELD_COUNT /* a cool trick to count the number of fields :) */ +} ; + +class Event_timed +{ + Event_timed(const Event_timed &); /* Prevent use of these */ + void operator=(Event_timed &); + my_bool in_spawned_thread; + ulong locked_by_thread_id; + my_bool running; + pthread_mutex_t LOCK_running; + + bool status_changed; + bool last_executed_changed; + +public: + TIME last_executed; + + LEX_STRING dbname; + LEX_STRING name; + LEX_STRING body; + + LEX_STRING definer_user; + LEX_STRING definer_host; + LEX_STRING definer;// combination of user and host + + LEX_STRING comment; + TIME starts; + TIME ends; + TIME execute_at; + my_bool starts_null; + my_bool ends_null; + my_bool execute_at_null; + + longlong expression; + interval_type interval; + + ulonglong created; + ulonglong modified; + enum enum_event_on_completion on_completion; + enum enum_event_status status; + sp_head *sphead; + ulong sql_mode; + const uchar *body_begin; + + bool dropped; + bool free_sphead_on_delete; + uint flags;//all kind of purposes + + Event_timed():in_spawned_thread(0),locked_by_thread_id(0), + running(0), status_changed(false), + last_executed_changed(false), expression(0), created(0), + modified(0), on_completion(MYSQL_EVENT_ON_COMPLETION_DROP), + status(MYSQL_EVENT_ENABLED), sphead(0), sql_mode(0), + body_begin(0), dropped(false), + free_sphead_on_delete(true), flags(0) + + { + pthread_mutex_init(&this->LOCK_running, MY_MUTEX_INIT_FAST); + init(); + } + + ~Event_timed() + { + pthread_mutex_destroy(&this->LOCK_running); + if (free_sphead_on_delete) + free_sp(); + } + + + void + init(); + + int + init_definer(THD *thd); + + int + init_execute_at(THD *thd, Item *expr); + + int + init_interval(THD *thd, Item *expr, interval_type new_interval); + + void + init_name(THD *thd, sp_name *spn); + + int + init_starts(THD *thd, Item *starts); + + int + init_ends(THD *thd, Item *ends); + + void + init_body(THD *thd); + + void + init_comment(THD *thd, LEX_STRING *set_comment); + + int + load_from_row(MEM_ROOT *mem_root, TABLE *table); + + bool + compute_next_execution_time(); + + void + mark_last_executed(THD *thd); + + int + drop(THD *thd); + + bool + update_fields(THD *thd); + + int + get_create_event(THD *thd, String *buf); + + int + execute(THD *thd, MEM_ROOT *mem_root= NULL); + + int + compile(THD *thd, MEM_ROOT *mem_root= NULL); + + my_bool + is_running() + { + my_bool ret; + + VOID(pthread_mutex_lock(&this->LOCK_running)); + ret= running; + VOID(pthread_mutex_unlock(&this->LOCK_running)); + + return ret; + } + + /* + Checks whether the object is being used in a spawned thread. + This method is for very basic checking. Use ::can_spawn_now_n_lock() + for most of the cases. + */ + + my_bool + can_spawn_now() + { + my_bool ret; + VOID(pthread_mutex_lock(&this->LOCK_running)); + ret= !in_spawned_thread; + VOID(pthread_mutex_unlock(&this->LOCK_running)); + return ret; + } + + /* + Checks whether this thread can lock the object for modification -> + preventing being spawned for execution, and locks if possible. + use ::can_spawn_now() only for basic checking because a race + condition may occur between the check and eventual modification (deletion) + of the object. + */ + + my_bool + can_spawn_now_n_lock(THD *thd); + + int + spawn_unlock(THD *thd); + + int + spawn_now(void * (*thread_func)(void*)); + + void + spawn_thread_finish(THD *thd); + + void + free_sp() + { + delete sphead; + sphead= 0; + } +protected: + bool + change_security_context(THD *thd, Security_context *s_ctx, + Security_context **backup); + + void + restore_security_context(THD *thd, Security_context *backup); +}; + + +int +evex_create_event(THD *thd, Event_timed *et, uint create_options, + uint *rows_affected); + +int +evex_update_event(THD *thd, Event_timed *et, sp_name *new_name, + uint *rows_affected); + +int +evex_drop_event(THD *thd, Event_timed *et, bool drop_if_exists, + uint *rows_affected); + +int +evex_open_event_table(THD *thd, enum thr_lock_type lock_type, TABLE **table); + +int +evex_show_create_event(THD *thd, sp_name *spn, LEX_STRING definer); + +int sortcmp_lex_string(LEX_STRING s, LEX_STRING t, CHARSET_INFO *cs); + +int +event_reconstruct_interval_expression(String *buf, + interval_type interval, + longlong expression); + +int +evex_drop_db_events(THD *thd, char *db); + + +int +init_events(); + +void +shutdown_events(); + + +// auxiliary +int +event_timed_compare(Event_timed **a, Event_timed **b); + + + +/* +CREATE TABLE event ( + db char(64) CHARACTER SET utf8 COLLATE utf8_bin NOT NULL default '', + name char(64) CHARACTER SET utf8 COLLATE utf8_bin NOT NULL default '', + body longblob NOT NULL, + definer char(77) CHARACTER SET utf8 COLLATE utf8_bin NOT NULL default '', + execute_at DATETIME default NULL, + interval_value int(11) default NULL, + interval_field ENUM('YEAR','QUARTER','MONTH','DAY','HOUR','MINUTE','WEEK', + 'SECOND','MICROSECOND', 'YEAR_MONTH','DAY_HOUR', + 'DAY_MINUTE','DAY_SECOND', + 'HOUR_MINUTE','HOUR_SECOND', + 'MINUTE_SECOND','DAY_MICROSECOND', + 'HOUR_MICROSECOND','MINUTE_MICROSECOND', + 'SECOND_MICROSECOND') default NULL, + created TIMESTAMP NOT NULL, + modified TIMESTAMP NOT NULL, + last_executed DATETIME default NULL, + starts DATETIME default NULL, + ends DATETIME default NULL, + status ENUM('ENABLED','DISABLED') NOT NULL default 'ENABLED', + on_completion ENUM('DROP','PRESERVE') NOT NULL default 'DROP', + comment varchar(64) CHARACTER SET utf8 COLLATE utf8_bin NOT NULL default '', + PRIMARY KEY (definer,db,name) +) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT 'Events'; +*/ + +#endif /* _EVENT_H_ */ diff --git a/sql/event_executor.cc b/sql/event_executor.cc new file mode 100644 index 00000000000..92acf154c75 --- /dev/null +++ b/sql/event_executor.cc @@ -0,0 +1,987 @@ +/* Copyright (C) 2004-2005 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "event_priv.h" +#include "event.h" +#include "sp.h" + +#define WAIT_STATUS_READY 0 +#define WAIT_STATUS_EMPTY_QUEUE 1 +#define WAIT_STATUS_NEW_TOP_EVENT 2 +#define WAIT_STATUS_STOP_EXECUTOR 3 + + +/* + Make this define DBUG_FAULTY_THR to be able to put breakpoints inside + code used by the scheduler's thread(s). In this case user connections + are not possible because the scheduler thread code is ran inside the + main thread (no spawning takes place. If you want to debug client + connection then start with --one-thread and make the define + DBUG_FAULTY_THR ! +*/ +#define DBUG_FAULTY_THR2 + +extern ulong thread_created; +extern const char *my_localhost; +extern pthread_attr_t connection_attrib; + +pthread_mutex_t LOCK_event_arrays, // mutex for when working with the queue + LOCK_workers_count, // mutex for when inc/dec uint workers_count + LOCK_evex_running; // mutes for managing bool evex_is_running + + +bool evex_is_running= false; + +ulonglong evex_main_thread_id= 0; +ulong opt_event_executor; +my_bool event_executor_running_global_var; +static my_bool evex_mutexes_initted= FALSE; +static uint workers_count; + +static int +evex_load_events_from_db(THD *thd); + +bool +evex_print_warnings(THD *thd, Event_timed *et); + +/* + TODO Andrey: Check for command line option whether to start + the main thread or not. +*/ + +pthread_handler_t +event_executor_worker(void *arg); + +pthread_handler_t +event_executor_main(void *arg); + + +/* + Returns the seconds difference of 2 TIME structs + + SYNOPSIS + evex_time_diff() + a - TIME struct 1 + b - TIME struct 2 + + Returns: + the seconds difference +*/ + +static int +evex_time_diff(TIME *a, TIME *b) +{ + return sec_since_epoch_TIME(a) - sec_since_epoch_TIME(b); +} + + +/* + Inits the mutexes used by the scheduler module + + SYNOPSIS + evex_init_mutexes() + + NOTES + The mutexes are : + LOCK_event_arrays + LOCK_workers_count + LOCK_evex_running +*/ + +static void +evex_init_mutexes() +{ + if (evex_mutexes_initted) + return; + + evex_mutexes_initted= TRUE; + pthread_mutex_init(&LOCK_event_arrays, MY_MUTEX_INIT_FAST); + pthread_mutex_init(&LOCK_workers_count, MY_MUTEX_INIT_FAST); + pthread_mutex_init(&LOCK_evex_running, MY_MUTEX_INIT_FAST); + + event_executor_running_global_var= opt_event_executor; +} + +extern TABLE_FIELD_W_TYPE mysql_db_table_fields[]; +extern time_t mysql_db_table_last_check; + +/* + Opens mysql.db and mysql.user and checks whether + 1. mysql.db has column Event_priv at column 20 (0 based); + 2. mysql.user has column Event_priv at column 29 (0 based); + + Synopsis + evex_check_system_tables() +*/ + +void +evex_check_system_tables() +{ + THD *thd= current_thd; + TABLE_LIST tables; + Open_tables_state backup; + + /* thd is 0x0 during boot of the server. Later it's !=0x0 */ + if (!thd) + return; + + thd->reset_n_backup_open_tables_state(&backup); + + bzero((char*) &tables, sizeof(tables)); + tables.db= (char*) "mysql"; + tables.table_name= tables.alias= (char*) "db"; + tables.lock_type= TL_READ; + + if (simple_open_n_lock_tables(thd, &tables)) + sql_print_error("Cannot open mysql.db"); + else + { + table_check_intact(tables.table, MYSQL_DB_FIELD_COUNT, mysql_db_table_fields, + &mysql_db_table_last_check,ER_CANNOT_LOAD_FROM_TABLE); + close_thread_tables(thd); + } + + bzero((char*) &tables, sizeof(tables)); + tables.db= (char*) "mysql"; + tables.table_name= tables.alias= (char*) "user"; + tables.lock_type= TL_READ; + + if (simple_open_n_lock_tables(thd, &tables)) + sql_print_error("Cannot open mysql.db"); + else + { + if (tables.table->s->fields < 29 || + strncmp(tables.table->field[29]->field_name, + STRING_WITH_LEN("Event_priv"))) + sql_print_error("mysql.user has no `Event_priv` column at position 29"); + + close_thread_tables(thd); + } + + thd->restore_backup_open_tables_state(&backup); +} + + +/* + Inits the scheduler. Called on server start and every time the scheduler + is started with switching the event_scheduler global variable to TRUE + + SYNOPSIS + init_events() + + NOTES + Inits the mutexes used by the scheduler. Done at server start. +*/ + +int +init_events() +{ + pthread_t th; + DBUG_ENTER("init_events"); + + DBUG_PRINT("info",("Starting events main thread")); + + evex_check_system_tables(); + + evex_init_mutexes(); + + VOID(pthread_mutex_lock(&LOCK_evex_running)); + evex_is_running= false; + VOID(pthread_mutex_unlock(&LOCK_evex_running)); + + if (event_executor_running_global_var) + { +#ifndef DBUG_FAULTY_THR + /* TODO Andrey: Change the error code returned! */ + if (pthread_create(&th, &connection_attrib, event_executor_main,(void*)NULL)) + DBUG_RETURN(ER_SLAVE_THREAD); +#else + event_executor_main(NULL); +#endif + } + + DBUG_RETURN(0); +} + + +/* + Cleans up scheduler memory. Called on server shutdown. + + SYNOPSIS + shutdown_events() + + NOTES + Destroys the mutexes. +*/ + +void +shutdown_events() +{ + DBUG_ENTER("shutdown_events"); + + if (evex_mutexes_initted) + { + evex_mutexes_initted= FALSE; + VOID(pthread_mutex_lock(&LOCK_evex_running)); + VOID(pthread_mutex_unlock(&LOCK_evex_running)); + + pthread_mutex_destroy(&LOCK_event_arrays); + pthread_mutex_destroy(&LOCK_workers_count); + pthread_mutex_destroy(&LOCK_evex_running); + } + DBUG_VOID_RETURN; +} + + +/* + Inits an scheduler thread handler, both the main and a worker + + SYNOPSIS + init_event_thread() + thd - the THD of the thread. Has to be allocated by the caller. + + NOTES + 1. The host of the thead is my_localhost + 2. thd->net is initted with NULL - no communication. + + Returns + 0 - OK + -1 - Error +*/ + +static int +init_event_thread(THD* thd) +{ + DBUG_ENTER("init_event_thread"); + thd->client_capabilities= 0; + thd->security_ctx->master_access= 0; + thd->security_ctx->db_access= 0; + thd->security_ctx->host_or_ip= (char*)my_localhost; + my_net_init(&thd->net, 0); + thd->net.read_timeout = slave_net_timeout; + thd->slave_thread= 0; + thd->options|= OPTION_AUTO_IS_NULL; + thd->client_capabilities= CLIENT_LOCAL_FILES; + thd->real_id=pthread_self(); + VOID(pthread_mutex_lock(&LOCK_thread_count)); + thd->thread_id= thread_id++; + VOID(pthread_mutex_unlock(&LOCK_thread_count)); + + if (init_thr_lock() || thd->store_globals()) + { + thd->cleanup(); + delete thd; + DBUG_RETURN(-1); + } + +#if !defined(__WIN__) && !defined(OS2) && !defined(__NETWARE__) + sigset_t set; + VOID(sigemptyset(&set)); // Get mask in use + VOID(pthread_sigmask(SIG_UNBLOCK,&set,&thd->block_signals)); +#endif + + thd->proc_info= "Initialized"; + thd->version= refresh_version; + thd->set_time(); + DBUG_RETURN(0); +} + + +/* + This function waits till the time next event in the queue should be + executed. + + Returns + WAIT_STATUS_READY There is an event to be executed right now + WAIT_STATUS_EMPTY_QUEUE No events or the last event was dropped. + WAIT_STATUS_NEW_TOP_EVENT New event has entered the queue and scheduled + on top. Restart ticking. + WAIT_STATUS_STOP_EXECUTOR The thread was killed or SET global event_scheduler=0; +*/ + +static int +executor_wait_till_next_event_exec(THD *thd) +{ + Event_timed *et; + TIME time_now; + int t2sleep; + + DBUG_ENTER("executor_wait_till_next_event_exec"); + /* + now let's see how much time to sleep, we know there is at least 1 + element in the queue. + */ + VOID(pthread_mutex_lock(&LOCK_event_arrays)); + if (!evex_queue_num_elements(EVEX_EQ_NAME)) + { + VOID(pthread_mutex_unlock(&LOCK_event_arrays)); + DBUG_RETURN(WAIT_STATUS_EMPTY_QUEUE); + } + et= evex_queue_first_element(&EVEX_EQ_NAME, Event_timed*); + DBUG_ASSERT(et); + if (et->status == MYSQL_EVENT_DISABLED) + { + DBUG_PRINT("evex main thread",("Now it is disabled-exec no more")); + if (et->dropped) + et->drop(thd); + delete et; + evex_queue_delete_element(&EVEX_EQ_NAME, 0);// 0 is top, internally 1 + VOID(pthread_mutex_unlock(&LOCK_event_arrays)); + sql_print_information("Event found disabled, dropping."); + DBUG_RETURN(1); + } + + DBUG_PRINT("evex main thread",("computing time to sleep till next exec")); + /* set the internal clock of thd */ + thd->end_time(); + my_tz_UTC->gmt_sec_to_TIME(&time_now, thd->query_start()); + t2sleep= evex_time_diff(&et->execute_at, &time_now); + VOID(pthread_mutex_unlock(&LOCK_event_arrays)); + + DBUG_PRINT("evex main thread",("unlocked LOCK_event_arrays")); + if (t2sleep > 0) + { + ulonglong modified= et->modified; + /* + We sleep t2sleep seconds but we check every second whether this thread + has been killed, or there is a new candidate + */ + while (t2sleep-- && !thd->killed && event_executor_running_global_var && + evex_queue_num_elements(EVEX_EQ_NAME) && + (evex_queue_first_element(&EVEX_EQ_NAME, Event_timed*) == et && + evex_queue_first_element(&EVEX_EQ_NAME, Event_timed*)->modified == + modified)) + { + DBUG_PRINT("evex main thread",("will sleep a bit more.")); + my_sleep(1000000); + } + DBUG_PRINT("info",("saved_modified=%llu current=%llu", modified, + evex_queue_num_elements(EVEX_EQ_NAME)? + evex_queue_first_element(&EVEX_EQ_NAME, Event_timed*)->modified: + (ulonglong)~0)); + } + + int ret= WAIT_STATUS_READY; + if (!evex_queue_num_elements(EVEX_EQ_NAME)) + ret= WAIT_STATUS_EMPTY_QUEUE; + else if (evex_queue_first_element(&EVEX_EQ_NAME, Event_timed*) != et) + ret= WAIT_STATUS_NEW_TOP_EVENT; + if (thd->killed && event_executor_running_global_var) + ret= WAIT_STATUS_STOP_EXECUTOR; + + DBUG_RETURN(ret); +} + + +/* + The main scheduler thread. Inits the priority queue on start and + destroys it on thread shutdown. Forks child threads for every event + execution. Sleeps between thread forking and does not do a busy wait. + + SYNOPSIS + event_executor_main() + arg unused + + NOTES + 1. The host of the thead is my_localhost + 2. thd->net is initted with NULL - no communication. + +*/ + +pthread_handler_t +event_executor_main(void *arg) +{ + THD *thd; /* needs to be first for thread_stack */ + uint i=0, j=0; + my_ulonglong cnt= 0; + + DBUG_ENTER("event_executor_main"); + DBUG_PRINT("event_executor_main", ("EVEX thread started")); + + + /* init memory root */ + init_alloc_root(&evex_mem_root, MEM_ROOT_BLOCK_SIZE, MEM_ROOT_PREALLOC); + + /* needs to call my_thread_init(), otherwise we get a coredump in DBUG_ stuff*/ + my_thread_init(); + + if (sizeof(my_time_t) != sizeof(time_t)) + { + sql_print_error("SCHEDULER: sizeof(my_time_t) != sizeof(time_t) ." + "The scheduler will not work correctly. Stopping."); + DBUG_ASSERT(0); + goto err_no_thd; + } + + /* note that contructor of THD uses DBUG_ ! */ + if (!(thd = new THD)) + { + sql_print_error("SCHEDULER: Cannot create THD for the main thread."); + goto err_no_thd; + } + thd->thread_stack = (char*)&thd; // remember where our stack is + + pthread_detach_this_thread(); + + if (init_event_thread(thd)) + goto finish; + + /* + make this thread visible it has no vio -> show processlist won't see it + unless it's marked as system thread + */ + thd->system_thread= 1; + + VOID(pthread_mutex_lock(&LOCK_thread_count)); + threads.append(thd); + thread_count++; + thread_running++; + VOID(pthread_mutex_unlock(&LOCK_thread_count)); + + DBUG_PRINT("EVEX main thread", ("Initing events_queue")); + + /* + eventually manifest that we are running, not to crashe because of + usage of non-initialized memory structures. + */ + VOID(pthread_mutex_lock(&LOCK_evex_running)); + VOID(pthread_mutex_lock(&LOCK_event_arrays)); + evex_queue_init(&EVEX_EQ_NAME); + VOID(pthread_mutex_unlock(&LOCK_event_arrays)); + evex_is_running= true; + VOID(pthread_mutex_unlock(&LOCK_evex_running)); + + thd->security_ctx->user= my_strdup("event_scheduler", MYF(0)); + + if (evex_load_events_from_db(thd)) + goto finish; + + evex_main_thread_id= thd->thread_id; + + sql_print_information("SCHEDULER: Main thread started"); + while (!thd->killed) + { + TIME time_now; + Event_timed *et; + + cnt++; + DBUG_PRINT("info", ("EVEX External Loop %d thd->k", cnt)); + + thd->proc_info = "Sleeping"; + if (!event_executor_running_global_var) + { + sql_print_information("SCHEDULER: Asked to stop."); + break; + } + + if (!evex_queue_num_elements(EVEX_EQ_NAME)) + { + my_sleep(1000000);// sleep 1s + continue; + } + +restart_ticking: + switch (executor_wait_till_next_event_exec(thd)) { + case WAIT_STATUS_READY: // time to execute the event on top + DBUG_PRINT("evex main thread",("time to execute an event")); + break; + case WAIT_STATUS_EMPTY_QUEUE: // no more events + DBUG_PRINT("evex main thread",("no more events")); + continue; + break; + case WAIT_STATUS_NEW_TOP_EVENT: // new event on top in the queue + DBUG_PRINT("evex main thread",("restart ticking")); + goto restart_ticking; + case WAIT_STATUS_STOP_EXECUTOR: + sql_print_information("SCHEDULER: Asked to stop."); + goto finish; + break; + default: + DBUG_ASSERT(0); + } + + + VOID(pthread_mutex_lock(&LOCK_event_arrays)); + thd->end_time(); + my_tz_UTC->gmt_sec_to_TIME(&time_now, thd->query_start()); + + if (!evex_queue_num_elements(EVEX_EQ_NAME)) + { + VOID(pthread_mutex_unlock(&LOCK_event_arrays)); + DBUG_PRINT("evex main thread",("empty queue")); + continue; + } + et= evex_queue_first_element(&EVEX_EQ_NAME, Event_timed*); + DBUG_PRINT("evex main thread",("got event from the queue")); + + if (!et->execute_at_null && my_time_compare(&time_now,&et->execute_at) == -1) + { + DBUG_PRINT("evex main thread",("still not the time for execution")); + VOID(pthread_mutex_unlock(&LOCK_event_arrays)); + continue; + } + + DBUG_PRINT("evex main thread",("it's right time")); + if (et->status == MYSQL_EVENT_ENABLED) + { + int fork_ret_code; + + DBUG_PRINT("evex main thread", ("[%10s] this exec at [%llu]", et->name.str, + TIME_to_ulonglong_datetime(&et->execute_at))); + et->mark_last_executed(thd); + if (et->compute_next_execution_time()) + { + sql_print_error("SCHEDULER: Error while computing time of %s.%s . " + "Disabling after execution.", + et->dbname.str, et->name.str); + et->status= MYSQL_EVENT_DISABLED; + } + DBUG_PRINT("evex main thread", ("[%10s] next exec at [%llu]", et->name.str, + TIME_to_ulonglong_datetime(&et->execute_at))); + + et->update_fields(thd); +#ifndef DBUG_FAULTY_THR + thread_safe_increment(workers_count, &LOCK_workers_count); + switch ((fork_ret_code= et->spawn_now(event_executor_worker))) { + case EVENT_EXEC_CANT_FORK: + thread_safe_decrement(workers_count, &LOCK_workers_count); + sql_print_error("SCHEDULER: Problem while trying to create a thread"); + UNLOCK_MUTEX_AND_BAIL_OUT(LOCK_event_arrays, finish); + case EVENT_EXEC_ALREADY_EXEC: + thread_safe_decrement(workers_count, &LOCK_workers_count); + sql_print_information("SCHEDULER: %s.%s in execution. Skip this time.", + et->dbname.str, et->name.str); + break; + default: + DBUG_ASSERT(!fork_ret_code); + if (fork_ret_code) + thread_safe_decrement(workers_count, &LOCK_workers_count); + break; + } +#else + event_executor_worker((void *) et); +#endif + /* + 1. For one-time event : year is > 0 and expression is 0 + 2. For recurring, expression is != -=> check execute_at_null in this case + */ + if ((et->execute_at.year && !et->expression) || et->execute_at_null) + et->flags |= EVENT_EXEC_NO_MORE; + + if ((et->flags & EVENT_EXEC_NO_MORE) || et->status == MYSQL_EVENT_DISABLED) + evex_queue_delete_element(&EVEX_EQ_NAME, 0);// 0 is top, internally 1 + else + evex_queue_first_updated(&EVEX_EQ_NAME); + } + DBUG_PRINT("evex main thread",("unlocking")); + VOID(pthread_mutex_unlock(&LOCK_event_arrays)); + }/* while */ +finish: + + /* First manifest that this thread does not work and then destroy */ + VOID(pthread_mutex_lock(&LOCK_evex_running)); + evex_is_running= false; + evex_main_thread_id= 0; + VOID(pthread_mutex_unlock(&LOCK_evex_running)); + + + /* + TODO: A better will be with a conditional variable + */ + /* + Read workers_count without lock, no need for locking. + In the worst case we have to wait 1sec more. + */ + sql_print_information("SCHEDULER: Stopping. Waiting for worker threads to finish."); + while (1) + { + VOID(pthread_mutex_lock(&LOCK_workers_count)); + if (!workers_count) + { + VOID(pthread_mutex_unlock(&LOCK_workers_count)); + break; + } + VOID(pthread_mutex_unlock(&LOCK_workers_count)); + my_sleep(1000000);// 1s + } + + /* + First we free all objects ... + Lock because a DROP DATABASE could be running in parallel and it locks on these + */ + sql_print_information("SCHEDULER: Emptying the queue."); + VOID(pthread_mutex_lock(&LOCK_event_arrays)); + for (i= 0; i < evex_queue_num_elements(EVEX_EQ_NAME); ++i) + { + Event_timed *et= evex_queue_element(&EVEX_EQ_NAME, i, Event_timed*); + et->free_sp(); + delete et; + } + VOID(pthread_mutex_unlock(&LOCK_event_arrays)); + /* ... then we can thrash the whole queue at once */ + evex_queue_destroy(&EVEX_EQ_NAME); + + thd->proc_info = "Clearing"; + DBUG_ASSERT(thd->net.buff != 0); + net_end(&thd->net); // destructor will not free it, because we are weird + THD_CHECK_SENTRY(thd); + + pthread_mutex_lock(&LOCK_thread_count); + thread_count--; + thread_running--; +#ifndef DBUG_FAULTY_THR + THD_CHECK_SENTRY(thd); + delete thd; +#endif + pthread_mutex_unlock(&LOCK_thread_count); + + +err_no_thd: + VOID(pthread_mutex_lock(&LOCK_evex_running)); + evex_is_running= false; + VOID(pthread_mutex_unlock(&LOCK_evex_running)); + + free_root(&evex_mem_root, MYF(0)); + sql_print_information("SCHEDULER: Stopped."); + +#ifndef DBUG_FAULTY_THR + my_thread_end(); + pthread_exit(0); +#endif + DBUG_RETURN(0); // Can't return anything here +} + + +/* + Function that executes an event in a child thread. Setups the + environment for the event execution and cleans after that. + + SYNOPSIS + event_executor_worker() + arg The Event_timed object to be processed +*/ + +pthread_handler_t +event_executor_worker(void *event_void) +{ + THD *thd; /* needs to be first for thread_stack */ + Event_timed *event = (Event_timed *) event_void; + MEM_ROOT worker_mem_root; + + DBUG_ENTER("event_executor_worker"); + + init_alloc_root(&worker_mem_root, MEM_ROOT_BLOCK_SIZE, MEM_ROOT_PREALLOC); + +#ifndef DBUG_FAULTY_THR + my_thread_init(); + + if (!(thd = new THD)) /* note that contructor of THD uses DBUG_ ! */ + { + sql_print_error("SCHEDULER: Cannot create a THD structure in an worker."); + goto err_no_thd; + } + thd->thread_stack = (char*)&thd; // remember where our stack is + thd->mem_root= &worker_mem_root; + + pthread_detach_this_thread(); + + if (init_event_thread(thd)) + goto err; + + thd->init_for_queries(); + + /* make this thread visible it has no vio -> show processlist needs this flag */ + thd->system_thread= 1; + + VOID(pthread_mutex_lock(&LOCK_thread_count)); + threads.append(thd); + thread_count++; + thread_running++; + VOID(pthread_mutex_unlock(&LOCK_thread_count)); +#else + thd= current_thd; +#endif + + thd->enable_slow_log= TRUE; + { + int ret; + sql_print_information("SCHEDULER: Executing event %s.%s of %s [EXPR:%d]", + event->dbname.str, event->name.str, + event->definer.str, (int) event->expression); + + ret= event->execute(thd, &worker_mem_root); + + evex_print_warnings(thd, event); + sql_print_information("SCHEDULER: Executed event %s.%s of %s [EXPR:%d]. " + "RetCode=%d", event->dbname.str, event->name.str, + event->definer.str, (int) event->expression, ret); + if (ret == EVEX_COMPILE_ERROR) + sql_print_information("SCHEDULER: COMPILE ERROR for event %s.%s of", + event->dbname.str, event->name.str, + event->definer.str); + else if (ret == EVEX_MICROSECOND_UNSUP) + sql_print_information("SCHEDULER: MICROSECOND is not supported"); + } + event->spawn_thread_finish(thd); + + +err: + VOID(pthread_mutex_lock(&LOCK_thread_count)); +#ifndef DBUG_FAULTY_THR + thread_count--; + thread_running--; + /* + Some extra safety, which should not been needed (normally, event deletion + should already have done these assignments (each event which sets these + variables is supposed to set them to 0 before terminating)). + */ + VOID(pthread_mutex_unlock(&LOCK_thread_count)); + + thd->proc_info = "Clearing"; + DBUG_ASSERT(thd->net.buff != 0); + net_end(&thd->net); // destructor will not free it, because we are weird + THD_CHECK_SENTRY(thd); + + VOID(pthread_mutex_lock(&LOCK_thread_count)); + THD_CHECK_SENTRY(thd); + delete thd; +#endif + VOID(pthread_mutex_unlock(&LOCK_thread_count)); + +err_no_thd: + + free_root(&worker_mem_root, MYF(0)); + thread_safe_decrement(workers_count, &LOCK_workers_count); + +#ifndef DBUG_FAULTY_THR + my_thread_end(); + pthread_exit(0); +#endif + DBUG_RETURN(0); // Can't return anything here +} + + +/* + Loads all ENABLED events from mysql.event into the prioritized + queue. Called during scheduler main thread initialization. Compiles + the events. Creates Event_timed instances for every ENABLED event + from mysql.event. + + SYNOPSIS + evex_load_events_from_db() + thd - Thread context. Used for memory allocation in some cases. + + RETURNS + 0 OK + !0 Error + + NOTES + Reports the error to the console +*/ + +static int +evex_load_events_from_db(THD *thd) +{ + TABLE *table; + READ_RECORD read_record_info; + int ret= -1; + uint count= 0; + + DBUG_ENTER("evex_load_events_from_db"); + + if ((ret= evex_open_event_table(thd, TL_READ, &table))) + { + sql_print_error("SCHEDULER: Table mysql.event is damaged. Can not open."); + DBUG_RETURN(SP_OPEN_TABLE_FAILED); + } + + VOID(pthread_mutex_lock(&LOCK_event_arrays)); + + init_read_record(&read_record_info, thd, table ,NULL,1,0); + while (!(read_record_info.read_record(&read_record_info))) + { + Event_timed *et; + if (!(et= new Event_timed)) + { + DBUG_PRINT("evex_load_events_from_db", ("Out of memory")); + ret= -1; + goto end; + } + DBUG_PRINT("evex_load_events_from_db", ("Loading event from row.")); + + if ((ret= et->load_from_row(&evex_mem_root, table))) + { + sql_print_error("SCHEDULER: Error while loading from mysql.event. " + "Table probably corrupted"); + goto end; + } + if (et->status != MYSQL_EVENT_ENABLED) + { + DBUG_PRINT("evex_load_events_from_db",("%s is disabled",et->name.str)); + delete et; + continue; + } + + DBUG_PRINT("evex_load_events_from_db", + ("Event %s loaded from row. Time to compile", et->name.str)); + + switch (ret= et->compile(thd, &evex_mem_root)) { + case EVEX_MICROSECOND_UNSUP: + sql_print_error("SCHEDULER: mysql.event is tampered. MICROSECOND is not " + "supported but found in mysql.event"); + goto end; + case EVEX_COMPILE_ERROR: + sql_print_error("SCHEDULER: Error while compiling %s.%s. Aborting load.", + et->dbname.str, et->name.str); + goto end; + default: + break; + } + + /* let's find when to be executed */ + if (et->compute_next_execution_time()) + { + sql_print_error("SCHEDULER: Error while computing execution time of %s.%s." + " Skipping", et->dbname.str, et->name.str); + continue; + } + + DBUG_PRINT("evex_load_events_from_db", ("Adding to the exec list.")); + + evex_queue_insert(&EVEX_EQ_NAME, (EVEX_PTOQEL) et); + DBUG_PRINT("evex_load_events_from_db", ("%p %*s", + et, et->name.length,et->name.str)); + count++; + } + + ret= 0; + +end: + VOID(pthread_mutex_unlock(&LOCK_event_arrays)); + end_read_record(&read_record_info); + + /* Force close to free memory */ + thd->version--; + + close_thread_tables(thd); + if (!ret) + sql_print_information("SCHEDULER: Loaded %d event%s", count, (count == 1)?"":"s"); + DBUG_PRINT("info", ("Status code %d. Loaded %d event(s)", ret, count)); + + DBUG_RETURN(ret); +} + + +/* + The update method of the global variable event_scheduler. + If event_scheduler is switched from 0 to 1 then the scheduler main + thread is started. + + SYNOPSIS + event_executor_worker() + thd - Thread context (unused) + car - the new value + + Returns + 0 OK (always) +*/ + +bool +sys_var_event_executor::update(THD *thd, set_var *var) +{ + /* here start the thread if not running. */ + DBUG_ENTER("sys_var_event_executor::update"); + VOID(pthread_mutex_lock(&LOCK_evex_running)); + *value= var->save_result.ulong_value; + + DBUG_PRINT("new_value", ("%d", *value)); + if ((my_bool) *value && !evex_is_running) + { + VOID(pthread_mutex_unlock(&LOCK_evex_running)); + init_events(); + } else + VOID(pthread_mutex_unlock(&LOCK_evex_running)); + + DBUG_RETURN(0); +} + + +extern LEX_STRING warning_level_names[]; + +typedef void (*sql_print_xxx_func)(const char *format, ...); +static sql_print_xxx_func sql_print_xxx_handlers[3] = +{ + sql_print_information, + sql_print_warning, + sql_print_error +}; + + +/* + Prints the stack of infos, warnings, errors from thd to + the console so it can be fetched by the logs-into-tables and + checked later. + + Synopsis + evex_print_warnings + thd - thread used during the execution of the event + et - the event itself + + Returns + 0 - OK (always) + +*/ + +bool +evex_print_warnings(THD *thd, Event_timed *et) +{ + MYSQL_ERROR *err; + DBUG_ENTER("evex_show_warnings"); + char msg_buf[1024]; + char prefix_buf[512]; + String prefix(prefix_buf, sizeof(prefix_buf), system_charset_info); + prefix.length(0); + + List_iterator_fast<MYSQL_ERROR> it(thd->warn_list); + while ((err= it++)) + { + String err_msg(msg_buf, sizeof(msg_buf), system_charset_info); + /* set it to 0 or we start adding at the end. That's the trick ;) */ + err_msg.length(0); + if (!prefix.length()) + { + prefix.append("SCHEDULER: ["); + + append_identifier(thd,&prefix,et->definer_user.str,et->definer_user.length); + prefix.append('@'); + append_identifier(thd,&prefix,et->definer_host.str,et->definer_host.length); + prefix.append("][", 2); + append_identifier(thd,&prefix, et->dbname.str, et->dbname.length); + prefix.append('.'); + append_identifier(thd,&prefix, et->name.str, et->name.length); + prefix.append("] ", 2); + } + + err_msg.append(prefix); + err_msg.append(err->msg, strlen(err->msg), system_charset_info); + err_msg.append("]"); + DBUG_ASSERT(err->level < 3); + (sql_print_xxx_handlers[err->level])("%*s", err_msg.length(), err_msg.c_ptr()); + } + + + DBUG_RETURN(FALSE); +} diff --git a/sql/event_priv.h b/sql/event_priv.h new file mode 100644 index 00000000000..6b23136847e --- /dev/null +++ b/sql/event_priv.h @@ -0,0 +1,82 @@ +/* Copyright (C) 2004-2005 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef _EVENT_PRIV_H_ +#define _EVENT_PRIV_H_ +#include "mysql_priv.h" + + +#define EVENT_EXEC_STARTED 0 +#define EVENT_EXEC_ALREADY_EXEC 1 +#define EVENT_EXEC_CANT_FORK 2 + +#define EVEX_USE_QUEUE + +#define UNLOCK_MUTEX_AND_BAIL_OUT(__mutex, __label) \ + { VOID(pthread_mutex_unlock(&__mutex)); goto __label; } + +#define EVEX_DB_FIELD_LEN 64 +#define EVEX_NAME_FIELD_LEN 64 +#define EVEX_MAX_INTERVAL_VALUE 2147483647L + +int +my_time_compare(TIME *a, TIME *b); + +int +evex_db_find_event_by_name(THD *thd, const LEX_STRING dbname, + const LEX_STRING ev_name, + const LEX_STRING user_name, + TABLE *table); + +int +event_timed_compare_q(void *vptr, byte* a, byte *b); + +int db_drop_event(THD *thd, Event_timed *et, bool drop_if_exists, + uint *rows_affected); + + +#define EXEC_QUEUE_QUEUE_NAME executing_queue +#define EXEC_QUEUE_DARR_NAME evex_executing_queue + + +#define EVEX_QUEUE_TYPE QUEUE +#define EVEX_PTOQEL byte * + +#define EVEX_EQ_NAME executing_queue +#define evex_queue_first_element(queue, __cast) ((__cast)queue_top(queue)) +#define evex_queue_element(queue, idx, __cast) ((__cast)queue_element(queue, idx)) +#define evex_queue_delete_element(queue, idx) queue_remove(queue, idx) +#define evex_queue_destroy(queue) delete_queue(queue) +#define evex_queue_first_updated(queue) queue_replaced(queue) +#define evex_queue_insert(queue, element) queue_insert_safe(queue, element); + + + +void +evex_queue_init(EVEX_QUEUE_TYPE *queue); + +#define evex_queue_num_elements(queue) queue.elements + + +extern bool evex_is_running; +extern MEM_ROOT evex_mem_root; +extern pthread_mutex_t LOCK_event_arrays, + LOCK_workers_count, + LOCK_evex_running; +extern ulonglong evex_main_thread_id; +extern QUEUE EVEX_EQ_NAME; + +#endif /* _EVENT_PRIV_H_ */ diff --git a/sql/event_timed.cc b/sql/event_timed.cc new file mode 100644 index 00000000000..a8620197668 --- /dev/null +++ b/sql/event_timed.cc @@ -0,0 +1,1523 @@ +/* Copyright (C) 2004-2005 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "event_priv.h" +#include "event.h" +#include "sp.h" + + + +extern int yyparse(void *thd); + +/* + Init all member variables + + SYNOPSIS + Event_timed::init() +*/ + +void +Event_timed::init() +{ + DBUG_ENTER("Event_timed::init"); + + dbname.str= name.str= body.str= comment.str= 0; + dbname.length= name.length= body.length= comment.length= 0; + + set_zero_time(&starts, MYSQL_TIMESTAMP_DATETIME); + set_zero_time(&ends, MYSQL_TIMESTAMP_DATETIME); + set_zero_time(&execute_at, MYSQL_TIMESTAMP_DATETIME); + set_zero_time(&last_executed, MYSQL_TIMESTAMP_DATETIME); + starts_null= ends_null= execute_at_null= TRUE; + + definer_user.str= definer_host.str= 0; + definer_user.length= definer_host.length= 0; + + sql_mode= 0; + + DBUG_VOID_RETURN; +} + + +/* + Set a name of the event + + SYNOPSIS + Event_timed::init_name() + thd THD + spn the name extracted in the parser +*/ + +void +Event_timed::init_name(THD *thd, sp_name *spn) +{ + DBUG_ENTER("Event_timed::init_name"); + /* During parsing, we must use thd->mem_root */ + MEM_ROOT *root= thd->mem_root; + + /* We have to copy strings to get them into the right memroot */ + if (spn) + { + dbname.length= spn->m_db.length; + if (spn->m_db.length == 0) + dbname.str= NULL; + else + dbname.str= strmake_root(root, spn->m_db.str, spn->m_db.length); + name.length= spn->m_name.length; + name.str= strmake_root(root, spn->m_name.str, spn->m_name.length); + + if (spn->m_qname.length == 0) + spn->init_qname(thd); + } + else if (thd->db) + { + dbname.length= thd->db_length; + dbname.str= strmake_root(root, thd->db, dbname.length); + } + + DBUG_PRINT("dbname", ("len=%d db=%s",dbname.length, dbname.str)); + DBUG_PRINT("name", ("len=%d name=%s",name.length, name.str)); + + DBUG_VOID_RETURN; +} + + +/* + Set body of the event - what should be executed. + + SYNOPSIS + Event_timed::init_body() + thd THD + + NOTE + The body is extracted by copying all data between the + start of the body set by another method and the current pointer in Lex. +*/ + +void +Event_timed::init_body(THD *thd) +{ + DBUG_ENTER("Event_timed::init_body"); + DBUG_PRINT("info", ("body=[%s] body_begin=0x%ld end=0x%ld", body_begin, + body_begin, thd->lex->ptr)); + + body.length= thd->lex->ptr - body_begin; + /* Trim nuls at the end */ + while (body.length && body_begin[body.length-1] == '\0') + body.length--; + + /* the first is always whitespace which I cannot skip in the parser */ + while (my_isspace(thd->variables.character_set_client, *body_begin)) + { + ++body_begin; + --body.length; + } + body.str= strmake_root(thd->mem_root, (char *)body_begin, body.length); + + DBUG_VOID_RETURN; +} + + +/* + Set time for execution for one time events. + + SYNOPSIS + Event_timed::init_execute_at() + expr when (datetime) + + RETURN VALUE + 0 OK + EVEX_PARSE_ERROR fix_fields failed + EVEX_BAD_PARAMS datetime is in the past + ER_WRONG_VALUE wrong value for execute at +*/ + +int +Event_timed::init_execute_at(THD *thd, Item *expr) +{ + my_bool not_used; + TIME ltime; + + TIME time_tmp; + DBUG_ENTER("Event_timed::init_execute_at"); + + if (expr->fix_fields(thd, &expr)) + DBUG_RETURN(EVEX_PARSE_ERROR); + + /* no starts and/or ends in case of execute_at */ + DBUG_PRINT("info", ("starts_null && ends_null should be 1 is %d", + (starts_null && ends_null))); + DBUG_ASSERT(starts_null && ends_null); + + /* let's check whether time is in the past */ + thd->variables.time_zone->gmt_sec_to_TIME(&time_tmp, + (my_time_t) thd->query_start()); + + if ((not_used= expr->get_date(<ime, TIME_NO_ZERO_DATE))) + DBUG_RETURN(ER_WRONG_VALUE); + + if (TIME_to_ulonglong_datetime(<ime) < + TIME_to_ulonglong_datetime(&time_tmp)) + DBUG_RETURN(EVEX_BAD_PARAMS); + + + /* + This may result in a 1970-01-01 date if ltime is > 2037-xx-xx. + CONVERT_TZ has similar problem. + */ + my_tz_UTC->gmt_sec_to_TIME(<ime, TIME_to_timestamp(thd,<ime, ¬_used)); + + execute_at_null= FALSE; + execute_at= ltime; + DBUG_RETURN(0); +} + + +/* + Set time for execution for transient events. + + SYNOPSIS + Event_timed::init_interval() + expr how much? + new_interval what is the interval + + RETURNS + 0 OK + EVEX_PARSE_ERROR fix_fields failed + EVEX_BAD_PARAMS Interval is not positive + EVEX_MICROSECOND_UNSUP Microseconds are not supported. +*/ + +int +Event_timed::init_interval(THD *thd, Item *expr, interval_type new_interval) +{ + String value; + INTERVAL interval; + + DBUG_ENTER("Event_timed::init_interval"); + + if (expr->fix_fields(thd, &expr)) + DBUG_RETURN(EVEX_PARSE_ERROR); + + value.alloc(MAX_DATETIME_FULL_WIDTH*MY_CHARSET_BIN_MB_MAXLEN); + if (get_interval_value(expr, new_interval, &value, &interval)) + DBUG_RETURN(EVEX_PARSE_ERROR); + + expression= 0; + + switch (new_interval) { + case INTERVAL_YEAR: + expression= interval.year; + break; + case INTERVAL_QUARTER: + case INTERVAL_MONTH: + expression= interval.month; + break; + case INTERVAL_WEEK: + case INTERVAL_DAY: + expression= interval.day; + break; + case INTERVAL_HOUR: + expression= interval.hour; + break; + case INTERVAL_MINUTE: + expression= interval.minute; + break; + case INTERVAL_SECOND: + expression= interval.second; + break; + case INTERVAL_YEAR_MONTH: // Allow YEAR-MONTH YYYYYMM + expression= interval.year* 12 + interval.month; + break; + case INTERVAL_DAY_HOUR: + expression= interval.day* 24 + interval.hour; + break; + case INTERVAL_DAY_MINUTE: + expression= (interval.day* 24 + interval.hour) * 60 + interval.minute; + break; + case INTERVAL_HOUR_SECOND: /* day is anyway 0 */ + case INTERVAL_DAY_SECOND: + /* DAY_SECOND having problems because of leap seconds? */ + expression= ((interval.day* 24 + interval.hour) * 60 + interval.minute)*60 + + interval.second; + break; + case INTERVAL_MINUTE_MICROSECOND: /* day and hour are 0 */ + case INTERVAL_HOUR_MICROSECOND: /* day is anyway 0 */ + case INTERVAL_DAY_MICROSECOND: + DBUG_RETURN(EVEX_MICROSECOND_UNSUP); + expression= ((((interval.day*24) + interval.hour)*60+interval.minute)*60 + + interval.second) * 1000000L + interval.second_part; + break; + case INTERVAL_HOUR_MINUTE: + expression= interval.hour * 60 + interval.minute; + break; + case INTERVAL_MINUTE_SECOND: + expression= interval.minute * 60 + interval.second; + break; + case INTERVAL_SECOND_MICROSECOND: + DBUG_RETURN(EVEX_MICROSECOND_UNSUP); + expression= interval.second * 1000000L + interval.second_part; + break; + case INTERVAL_MICROSECOND: + DBUG_RETURN(EVEX_MICROSECOND_UNSUP); + } + if (interval.neg || expression > EVEX_MAX_INTERVAL_VALUE) + DBUG_RETURN(EVEX_BAD_PARAMS); + + this->interval= new_interval; + DBUG_RETURN(0); +} + + +/* + Set activation time. + + SYNOPSIS + Event_timed::init_starts() + expr how much? + interval what is the interval + + NOTES + Note that activation time is not execution time. + EVERY 5 MINUTE STARTS "2004-12-12 10:00:00" means that + the event will be executed every 5 minutes but this will + start at the date shown above. Expressions are possible : + DATE_ADD(NOW(), INTERVAL 1 DAY) -- start tommorow at + same time. + + RETURNS + 0 OK + EVEX_PARSE_ERROR fix_fields failed +*/ + +int +Event_timed::init_starts(THD *thd, Item *new_starts) +{ + my_bool not_used; + TIME ltime, time_tmp; + + DBUG_ENTER("Event_timed::init_starts"); + + if (new_starts->fix_fields(thd, &new_starts)) + DBUG_RETURN(EVEX_PARSE_ERROR); + + if ((not_used= new_starts->get_date(<ime, TIME_NO_ZERO_DATE))) + DBUG_RETURN(EVEX_BAD_PARAMS); + + /* Let's check whether time is in the past */ + thd->variables.time_zone->gmt_sec_to_TIME(&time_tmp, + (my_time_t) thd->query_start()); + + DBUG_PRINT("info",("now =%lld", TIME_to_ulonglong_datetime(&time_tmp))); + DBUG_PRINT("info",("starts=%lld", TIME_to_ulonglong_datetime(<ime))); + if (TIME_to_ulonglong_datetime(<ime) < + TIME_to_ulonglong_datetime(&time_tmp)) + DBUG_RETURN(EVEX_BAD_PARAMS); + + /* + This may result in a 1970-01-01 date if ltime is > 2037-xx-xx + CONVERT_TZ has similar problem + */ + my_tz_UTC->gmt_sec_to_TIME(<ime, TIME_to_timestamp(thd, <ime, ¬_used)); + + starts= ltime; + starts_null= FALSE; + DBUG_RETURN(0); +} + + +/* + Set deactivation time. + + SYNOPSIS + Event_timed::init_ends() + thd THD + new_ends when? + + NOTES + Note that activation time is not execution time. + EVERY 5 MINUTE ENDS "2004-12-12 10:00:00" means that + the event will be executed every 5 minutes but this will + end at the date shown above. Expressions are possible : + DATE_ADD(NOW(), INTERVAL 1 DAY) -- end tommorow at + same time. + + RETURNS + 0 OK + EVEX_PARSE_ERROR fix_fields failed + EVEX_BAD_PARAMS ENDS before STARTS +*/ + +int +Event_timed::init_ends(THD *thd, Item *new_ends) +{ + TIME ltime, ltime_now; + my_bool not_used; + + DBUG_ENTER("Event_timed::init_ends"); + + if (new_ends->fix_fields(thd, &new_ends)) + DBUG_RETURN(EVEX_PARSE_ERROR); + + DBUG_PRINT("info", ("convert to TIME")); + if ((not_used= new_ends->get_date(<ime, TIME_NO_ZERO_DATE))) + DBUG_RETURN(EVEX_BAD_PARAMS); + + /* + This may result in a 1970-01-01 date if ltime is > 2037-xx-xx ? + CONVERT_TZ has similar problem ? + */ + DBUG_PRINT("info", ("get the UTC time")); + my_tz_UTC->gmt_sec_to_TIME(<ime, TIME_to_timestamp(thd, <ime, ¬_used)); + + /* Check whether ends is after starts */ + DBUG_PRINT("info", ("ENDS after STARTS?")); + if (!starts_null && my_time_compare(&starts, <ime) != -1) + DBUG_RETURN(EVEX_BAD_PARAMS); + + /* + The parser forces starts to be provided but one day STARTS could be + set before NOW() and in this case the following check should be done. + Check whether ENDS is not in the past. + */ + DBUG_PRINT("info", ("ENDS after NOW?")); + my_tz_UTC->gmt_sec_to_TIME(<ime_now, thd->query_start()); + if (my_time_compare(<ime_now, <ime) == 1) + DBUG_RETURN(EVEX_BAD_PARAMS); + + ends= ltime; + ends_null= FALSE; + DBUG_RETURN(0); +} + + +/* + Sets comment. + + SYNOPSIS + Event_timed::init_comment() + thd THD - used for memory allocation + comment the string. +*/ + +void +Event_timed::init_comment(THD *thd, LEX_STRING *set_comment) +{ + DBUG_ENTER("Event_timed::init_comment"); + + comment.str= strmake_root(thd->mem_root, set_comment->str, + comment.length= set_comment->length); + + DBUG_VOID_RETURN; +} + + +/* + Inits definer (definer_user and definer_host) during parsing. + + SYNOPSIS + Event_timed::init_definer() +*/ + +int +Event_timed::init_definer(THD *thd) +{ + DBUG_ENTER("Event_timed::init_definer"); + + DBUG_PRINT("info",("init definer_user thd->mem_root=0x%lx " + "thd->sec_ctx->priv_user=0x%lx", thd->mem_root, + thd->security_ctx->priv_user)); + definer_user.str= strdup_root(thd->mem_root, thd->security_ctx->priv_user); + definer_user.length= strlen(thd->security_ctx->priv_user); + + DBUG_PRINT("info",("init definer_host thd->s_c->priv_host=0x%lx", + thd->security_ctx->priv_host)); + definer_host.str= strdup_root(thd->mem_root, thd->security_ctx->priv_host); + definer_host.length= strlen(thd->security_ctx->priv_host); + + DBUG_PRINT("info",("init definer as whole")); + definer.length= definer_user.length + definer_host.length + 1; + definer.str= alloc_root(thd->mem_root, definer.length + 1); + + DBUG_PRINT("info",("copy the user")); + memcpy(definer.str, definer_user.str, definer_user.length); + definer.str[definer_user.length]= '@'; + + DBUG_PRINT("info",("copy the host")); + memcpy(definer.str + definer_user.length + 1, definer_host.str, + definer_host.length); + definer.str[definer.length]= '\0'; + DBUG_PRINT("info",("definer initted")); + + DBUG_RETURN(0); +} + + +/* + Loads an event from a row from mysql.event + + SYNOPSIS + Event_timed::load_from_row(MEM_ROOT *mem_root, TABLE *table) + + NOTES + This method is silent on errors and should behave like that. Callers + should handle throwing of error messages. The reason is that the class + should not know about how to deal with communication. +*/ + +int +Event_timed::load_from_row(MEM_ROOT *mem_root, TABLE *table) +{ + char *ptr; + Event_timed *et; + uint len; + bool res1, res2; + + DBUG_ENTER("Event_timed::load_from_row"); + + if (!table) + goto error; + + et= this; + + if (table->s->fields != EVEX_FIELD_COUNT) + goto error; + + if ((et->dbname.str= get_field(mem_root, + table->field[EVEX_FIELD_DB])) == NULL) + goto error; + + et->dbname.length= strlen(et->dbname.str); + + if ((et->name.str= get_field(mem_root, + table->field[EVEX_FIELD_NAME])) == NULL) + goto error; + + et->name.length= strlen(et->name.str); + + if ((et->body.str= get_field(mem_root, + table->field[EVEX_FIELD_BODY])) == NULL) + goto error; + + et->body.length= strlen(et->body.str); + + if ((et->definer.str= get_field(mem_root, + table->field[EVEX_FIELD_DEFINER])) == NullS) + goto error; + et->definer.length= strlen(et->definer.str); + + ptr= strchr(et->definer.str, '@'); + + if (! ptr) + ptr= et->definer.str; + + len= ptr - et->definer.str; + + et->definer_user.str= strmake_root(mem_root, et->definer.str, len); + et->definer_user.length= len; + len= et->definer.length - len - 1; //1 is because of @ + et->definer_host.str= strmake_root(mem_root, ptr + 1, len);/* 1:because of @*/ + et->definer_host.length= len; + + et->starts_null= table->field[EVEX_FIELD_STARTS]->is_null(); + res1= table->field[EVEX_FIELD_STARTS]->get_date(&et->starts,TIME_NO_ZERO_DATE); + + et->ends_null= table->field[EVEX_FIELD_ENDS]->is_null(); + res2= table->field[EVEX_FIELD_ENDS]->get_date(&et->ends, TIME_NO_ZERO_DATE); + + if (!table->field[EVEX_FIELD_INTERVAL_EXPR]->is_null()) + et->expression= table->field[EVEX_FIELD_INTERVAL_EXPR]->val_int(); + else + et->expression= 0; + /* + If res1 and res2 are true then both fields are empty. + Hence if EVEX_FIELD_EXECUTE_AT is empty there is an error. + */ + et->execute_at_null= table->field[EVEX_FIELD_EXECUTE_AT]->is_null(); + DBUG_ASSERT(!(et->starts_null && et->ends_null && !et->expression && + et->execute_at_null)); + if (!et->expression && + table->field[EVEX_FIELD_EXECUTE_AT]->get_date(&et->execute_at, + TIME_NO_ZERO_DATE)) + goto error; + + /* + In DB the values start from 1 but enum interval_type starts + from 0 + */ + if (!table->field[EVEX_FIELD_TRANSIENT_INTERVAL]->is_null()) + et->interval= (interval_type) + ((ulonglong) table->field[EVEX_FIELD_TRANSIENT_INTERVAL]->val_int() - 1); + else + et->interval= (interval_type) 0; + + et->created= table->field[EVEX_FIELD_CREATED]->val_int(); + et->modified= table->field[EVEX_FIELD_MODIFIED]->val_int(); + + /* + ToDo Andrey : Ask PeterG & Serg what to do in this case. + Whether on load last_executed_at should be loaded + or it must be 0ed. If last_executed_at is loaded + then an event can be scheduled for execution + instantly. Let's say an event has to be executed + every 15 mins. The server has been stopped for + more than this time and then started. If L_E_AT + is loaded from DB, execution at L_E_AT+15min + will be scheduled. However this time is in the past. + Hence immediate execution. Due to patch of + ::mark_last_executed() last_executed gets time_now + and not execute_at. If not like this a big + queue can be scheduled for times which are still in + the past (2, 3 and more executions which will be + consequent). + */ + set_zero_time(&last_executed, MYSQL_TIMESTAMP_DATETIME); +#ifdef ANDREY_0 + table->field[EVEX_FIELD_LAST_EXECUTED]-> + get_date(&et->last_executed, TIME_NO_ZERO_DATE); +#endif + last_executed_changed= false; + + /* ToDo : Andrey . Find a way not to allocate ptr on event_mem_root */ + if ((ptr= get_field(mem_root, table->field[EVEX_FIELD_STATUS])) == NullS) + goto error; + + DBUG_PRINT("load_from_row", ("Event [%s] is [%s]", et->name.str, ptr)); + et->status= (ptr[0]=='E'? MYSQL_EVENT_ENABLED:MYSQL_EVENT_DISABLED); + + /* ToDo : Andrey . Find a way not to allocate ptr on event_mem_root */ + if ((ptr= get_field(mem_root, + table->field[EVEX_FIELD_ON_COMPLETION])) == NullS) + goto error; + + et->on_completion= (ptr[0]=='D'? MYSQL_EVENT_ON_COMPLETION_DROP: + MYSQL_EVENT_ON_COMPLETION_PRESERVE); + + et->comment.str= get_field(mem_root, table->field[EVEX_FIELD_COMMENT]); + if (et->comment.str != NullS) + et->comment.length= strlen(et->comment.str); + else + et->comment.length= 0; + + + et->sql_mode= (ulong) table->field[EVEX_FIELD_SQL_MODE]->val_int(); + + DBUG_RETURN(0); +error: + DBUG_RETURN(EVEX_GET_FIELD_FAILED); +} + + +/* + Computes the sum of a timestamp plus interval + + SYNOPSIS + get_next_time(TIME *start, int interval_value, interval_type interval) + next the sum + start add interval_value to this time + i_value quantity of time type interval to add + i_type type of interval to add (SECOND, MINUTE, HOUR, WEEK ...) +*/ + +static +bool get_next_time(TIME *next, TIME *start, int i_value, interval_type i_type) +{ + bool ret; + INTERVAL interval; + TIME tmp; + + bzero(&interval, sizeof(interval)); + + switch (i_type) { + case INTERVAL_YEAR: + interval.year= (ulong) i_value; + break; + case INTERVAL_QUARTER: + interval.month= (ulong)(i_value*3); + break; + case INTERVAL_YEAR_MONTH: + case INTERVAL_MONTH: + interval.month= (ulong) i_value; + break; + case INTERVAL_WEEK: + interval.day= (ulong)(i_value*7); + break; + case INTERVAL_DAY: + interval.day= (ulong) i_value; + break; + case INTERVAL_DAY_HOUR: + case INTERVAL_HOUR: + interval.hour= (ulong) i_value; + break; + case INTERVAL_DAY_MINUTE: + case INTERVAL_HOUR_MINUTE: + case INTERVAL_MINUTE: + interval.minute=i_value; + break; + case INTERVAL_DAY_SECOND: + case INTERVAL_HOUR_SECOND: + case INTERVAL_MINUTE_SECOND: + case INTERVAL_SECOND: + interval.second=i_value; + break; + case INTERVAL_DAY_MICROSECOND: + case INTERVAL_HOUR_MICROSECOND: + case INTERVAL_MINUTE_MICROSECOND: + case INTERVAL_SECOND_MICROSECOND: + case INTERVAL_MICROSECOND: + interval.second_part=i_value; + break; + } + tmp= *start; + if (!(ret= date_add_interval(&tmp, i_type, interval))) + *next= tmp; + + return ret; +} + + +/* + Computes next execution time. + + SYNOPSIS + Event_timed::compute_next_execution_time() + + NOTES + The time is set in execute_at, if no more executions the latter is set to + 0000-00-00. +*/ + +bool +Event_timed::compute_next_execution_time() +{ + TIME time_now; + my_time_t now; + int tmp; + + DBUG_ENTER("Event_timed::compute_next_execution_time"); + + if (status == MYSQL_EVENT_DISABLED) + { + DBUG_PRINT("compute_next_execution_time", + ("Event %s is DISABLED", name.str)); + goto ret; + } + /* If one-time, no need to do computation */ + if (!expression) + { + /* Let's check whether it was executed */ + if (last_executed.year) + { + DBUG_PRINT("info",("One-time event %s.%s of was already executed", + dbname.str, name.str, definer.str)); + dropped= (on_completion == MYSQL_EVENT_ON_COMPLETION_DROP); + DBUG_PRINT("info",("One-time event will be dropped=%d.", dropped)); + + status= MYSQL_EVENT_DISABLED; + status_changed= true; + } + goto ret; + } + time((time_t *)&now); + my_tz_UTC->gmt_sec_to_TIME(&time_now, now); + +#ifdef ANDREY_0 + sql_print_information("[%s.%s]", dbname.str, name.str); + sql_print_information("time_now : [%d-%d-%d %d:%d:%d ]", + time_now.year, time_now.month, time_now.day, + time_now.hour, time_now.minute, time_now.second); + sql_print_information("starts : [%d-%d-%d %d:%d:%d ]", starts.year, + starts.month, starts.day, starts.hour, + starts.minute, starts.second); + sql_print_information("ends : [%d-%d-%d %d:%d:%d ]", ends.year, + ends.month, ends.day, ends.hour, + ends.minute, ends.second); + sql_print_information("m_last_ex: [%d-%d-%d %d:%d:%d ]", last_executed.year, + last_executed.month, last_executed.day, + last_executed.hour, last_executed.minute, + last_executed.second); +#endif + + /* if time_now is after ends don't execute anymore */ + if (!ends_null && (tmp= my_time_compare(&ends, &time_now)) == -1) + { + /* time_now is after ends. don't execute anymore */ + set_zero_time(&execute_at, MYSQL_TIMESTAMP_DATETIME); + execute_at_null= TRUE; + if (on_completion == MYSQL_EVENT_ON_COMPLETION_DROP) + dropped= true; + status= MYSQL_EVENT_DISABLED; + status_changed= true; + + goto ret; + } + + /* + Here time_now is before or equals ends if the latter is set. + Let's check whether time_now is before starts. + If so schedule for starts. + */ + if (!starts_null && (tmp= my_time_compare(&time_now, &starts)) < 1) + { + if (tmp == 0 && my_time_compare(&starts, &last_executed) == 0) + { + /* + time_now = starts = last_executed + do nothing or we will schedule for second time execution at starts. + */ + } + else + { + /* + starts is in the future + time_now before starts. Scheduling for starts + */ + execute_at= starts; + execute_at_null= FALSE; + goto ret; + } + } + + if (!starts_null && !ends_null) + { + /* + Both starts and m_ends are set and time_now is between them (incl.) + If last_executed is set then increase with m_expression. The new TIME is + after m_ends set execute_at to 0. And check for on_completion + If not set then schedule for now. + */ + if (!last_executed.year) + { + execute_at= time_now; + execute_at_null= FALSE; + } + else + { + TIME next_exec; + + if (get_next_time(&next_exec, &last_executed, expression, interval)) + goto err; + + /* There was previous execution */ + if (my_time_compare(&ends, &next_exec) == -1) + { + /* Next execution after ends. No more executions */ + set_zero_time(&execute_at, MYSQL_TIMESTAMP_DATETIME); + execute_at_null= TRUE; + if (on_completion == MYSQL_EVENT_ON_COMPLETION_DROP) + dropped= true; + } + else + { + execute_at= next_exec; + execute_at_null= FALSE; + } + } + goto ret; + } + else if (starts_null && ends_null) + { + /* + Both starts and m_ends are not set, so we schedule for the next + based on last_executed. + */ + if (last_executed.year) + { + if (get_next_time(&execute_at, &last_executed, expression, interval)) + goto err; + } + else + { + /* last_executed not set. Schedule the event for now */ + execute_at= time_now; + } + execute_at_null= FALSE; + } + else + { + /* either starts or m_ends is set */ + if (!starts_null) + { + /* + - starts is set. + - starts is not in the future according to check made before + Hence schedule for starts + m_expression in case last_executed + is not set, otherwise to last_executed + m_expression + */ + if (last_executed.year) + { + if (get_next_time(&execute_at, &last_executed, expression, interval)) + goto err; + } + else + execute_at= starts; + execute_at_null= FALSE; + } + else + { + /* + - m_ends is set + - m_ends is after time_now or is equal + Hence check for m_last_execute and increment with m_expression. + If last_executed is not set then schedule for now + */ + + if (!last_executed.year) + execute_at= time_now; + else + { + TIME next_exec; + + if (get_next_time(&next_exec, &last_executed, expression, interval)) + goto err; + + if (my_time_compare(&ends, &next_exec) == -1) + { + set_zero_time(&execute_at, MYSQL_TIMESTAMP_DATETIME); + execute_at_null= TRUE; + if (on_completion == MYSQL_EVENT_ON_COMPLETION_DROP) + dropped= true; + } + else + { + execute_at= next_exec; + execute_at_null= FALSE; + } + } + } + goto ret; + } +ret: + + DBUG_RETURN(false); +err: + DBUG_RETURN(true); +} + + +/* + Set the internal last_executed TIME struct to now. NOW is the + time according to thd->query_start(), so the THD's clock. + + SYNOPSIS + Event_timed::drop() + thd thread context +*/ + +void +Event_timed::mark_last_executed(THD *thd) +{ + TIME time_now; + + thd->end_time(); + my_tz_UTC->gmt_sec_to_TIME(&time_now, (my_time_t) thd->query_start()); + + last_executed= time_now; /* was execute_at */ +#ifdef ANDREY_0 + last_executed= execute_at; +#endif + last_executed_changed= true; +} + + +/* + Drops the event + + SYNOPSIS + Event_timed::drop() + thd thread context + + RETURN VALUE + 0 OK + -1 Cannot open mysql.event + -2 Cannot find the event in mysql.event (already deleted?) + + others return code from SE in case deletion of the event row + failed. +*/ + +int +Event_timed::drop(THD *thd) +{ + uint tmp= 0; + DBUG_ENTER("Event_timed::drop"); + + DBUG_RETURN(db_drop_event(thd, this, false, &tmp)); +} + + +/* + Saves status and last_executed_at to the disk if changed. + + SYNOPSIS + Event_timed::update_fields() + thd - thread context + + RETURN VALUE + 0 OK + SP_OPEN_TABLE_FAILED Error while opening mysql.event for writing + EVEX_WRITE_ROW_FAILED On error to write to disk + + others return code from SE in case deletion of the event + row failed. +*/ + +bool +Event_timed::update_fields(THD *thd) +{ + TABLE *table; + Open_tables_state backup; + int ret= 0; + + DBUG_ENTER("Event_timed::update_time_fields"); + + DBUG_PRINT("enter", ("name: %*s", name.length, name.str)); + + /* No need to update if nothing has changed */ + if (!(status_changed || last_executed_changed)) + goto done; + + thd->reset_n_backup_open_tables_state(&backup); + + if (evex_open_event_table(thd, TL_WRITE, &table)) + { + ret= SP_OPEN_TABLE_FAILED; + goto done; + } + + + if ((ret= evex_db_find_event_by_name(thd, dbname, name, definer, table))) + goto done; + + store_record(table,record[1]); + /* Don't update create on row update. */ + table->timestamp_field_type= TIMESTAMP_NO_AUTO_SET; + + if (last_executed_changed) + { + table->field[EVEX_FIELD_LAST_EXECUTED]->set_notnull(); + table->field[EVEX_FIELD_LAST_EXECUTED]->store_time(&last_executed, + MYSQL_TIMESTAMP_DATETIME); + last_executed_changed= false; + } + if (status_changed) + { + table->field[EVEX_FIELD_STATUS]->set_notnull(); + table->field[EVEX_FIELD_STATUS]->store((longlong)status, true); + status_changed= false; + } + + if ((table->file->ha_update_row(table->record[1],table->record[0]))) + ret= EVEX_WRITE_ROW_FAILED; + +done: + close_thread_tables(thd); + thd->restore_backup_open_tables_state(&backup); + + DBUG_RETURN(ret); +} + +extern LEX_STRING interval_type_to_name[]; + +/* + Get SHOW CREATE EVENT as string + + SYNOPSIS + Event_timed::get_create_event(THD *thd, String *buf) + thd Thread + buf String*, should be already allocated. CREATE EVENT goes inside. + + RETURN VALUE + 0 OK + EVEX_MICROSECOND_UNSUP Error (for now if mysql.event has been + tampered and MICROSECONDS interval or + derivative has been put there. +*/ + +int +Event_timed::get_create_event(THD *thd, String *buf) +{ + int multipl= 0; + char tmp_buff[128]; + String expr_buf(tmp_buff, sizeof(tmp_buff), system_charset_info); + expr_buf.length(0); + + DBUG_ENTER("get_create_event"); + DBUG_PRINT("ret_info",("body_len=[%d]body=[%s]", body.length, body.str)); + + if (expression && + event_reconstruct_interval_expression(&expr_buf, interval, expression)) + DBUG_RETURN(EVEX_MICROSECOND_UNSUP); + + buf->append(STRING_WITH_LEN("CREATE EVENT ")); + append_identifier(thd, buf, dbname.str, dbname.length); + buf->append(STRING_WITH_LEN(".")); + append_identifier(thd, buf, name.str, name.length); + + buf->append(STRING_WITH_LEN(" ON SCHEDULE ")); + if (expression) + { + buf->append(STRING_WITH_LEN("EVERY ")); + buf->append(expr_buf); + buf->append(' '); + LEX_STRING *ival= &interval_type_to_name[interval]; + buf->append(ival->str, ival->length); + } + else + { + char dtime_buff[20*2+32];/* +32 to make my_snprintf_{8bit|ucs2} happy */ + buf->append(STRING_WITH_LEN("AT '")); + /* + Pass the buffer and the second param tells fills the buffer and + returns the number of chars to copy. + */ + buf->append(dtime_buff, my_datetime_to_str(&execute_at, dtime_buff)); + buf->append(STRING_WITH_LEN("'")); + } + + if (on_completion == MYSQL_EVENT_ON_COMPLETION_DROP) + buf->append(STRING_WITH_LEN(" ON COMPLETION NOT PRESERVE ")); + else + buf->append(STRING_WITH_LEN(" ON COMPLETION PRESERVE ")); + + if (status == MYSQL_EVENT_ENABLED) + buf->append(STRING_WITH_LEN("ENABLE")); + else + buf->append(STRING_WITH_LEN("DISABLE")); + + if (comment.length) + { + buf->append(STRING_WITH_LEN(" COMMENT ")); + append_unescaped(buf, comment.str, comment.length); + } + buf->append(STRING_WITH_LEN(" DO ")); + buf->append(body.str, body.length); + + DBUG_RETURN(0); +} + + +/* + Executes the event (the underlying sp_head object); + + SYNOPSIS + evex_fill_row() + thd THD + mem_root If != NULL use it to compile the event on it + + RETURNS + 0 success + -99 No rights on this.dbname.str + -100 event in execution (parallel execution is impossible) + others retcodes of sp_head::execute_procedure() +*/ + +int +Event_timed::execute(THD *thd, MEM_ROOT *mem_root) +{ + Security_context *save_ctx; + /* this one is local and not needed after exec */ + Security_context security_ctx; + int ret= 0; + + DBUG_ENTER("Event_timed::execute"); + DBUG_PRINT("info", (" EVEX EXECUTING event %s.%s [EXPR:%d]", + dbname.str, name.str, (int) expression)); + + VOID(pthread_mutex_lock(&this->LOCK_running)); + if (running) + { + VOID(pthread_mutex_unlock(&this->LOCK_running)); + DBUG_RETURN(-100); + } + running= true; + VOID(pthread_mutex_unlock(&this->LOCK_running)); + + DBUG_PRINT("info", ("master_access=%d db_access=%d", + thd->security_ctx->master_access, thd->security_ctx->db_access)); + change_security_context(thd, &security_ctx, &save_ctx); + DBUG_PRINT("info", ("master_access=%d db_access=%d", + thd->security_ctx->master_access, thd->security_ctx->db_access)); + + if (!sphead && (ret= compile(thd, mem_root))) + goto done; + /* Now we are sure we have valid this->sphead so we can copy the context */ + sphead->m_security_ctx= security_ctx; + /* + THD::~THD will clean this or if there is DROP DATABASE in the SP then + it will be free there. It should not point to our buffer which is allocated + on a mem_root. + */ + thd->db= my_strdup(dbname.str, MYF(0)); + thd->db_length= dbname.length; + if (!check_access(thd, EVENT_ACL,dbname.str, 0, 0, 0,is_schema_db(dbname.str))) + { + List<Item> empty_item_list; + empty_item_list.empty(); + if (thd->enable_slow_log) + sphead->m_flags|= sp_head::LOG_SLOW_STATEMENTS; + sphead->m_flags|= sp_head::LOG_GENERAL_LOG; + + ret= sphead->execute_procedure(thd, &empty_item_list); + } + else + { + DBUG_PRINT("error", ("%s@%s has no rights on %s", definer_user.str, + definer_host.str, dbname.str)); + ret= -99; + } + restore_security_context(thd, save_ctx); + DBUG_PRINT("info", ("master_access=%d db_access=%d", + thd->security_ctx->master_access, thd->security_ctx->db_access)); + + VOID(pthread_mutex_lock(&this->LOCK_running)); + running= false; + VOID(pthread_mutex_unlock(&this->LOCK_running)); + +done: + /* + 1. Don't cache sphead if allocated on another mem_root + 2. Don't call security_ctx.destroy() because this will free our dbname.str + name.str and definer.str + */ + if (mem_root && sphead) + { + delete sphead; + sphead= 0; + } + DBUG_PRINT("info", (" EVEX EXECUTED event %s.%s [EXPR:%d]. RetCode=%d", + dbname.str, name.str, (int) expression, ret)); + + DBUG_RETURN(ret); +} + + +/* + Switches the security context + Synopsis + Event_timed::change_security_context() + thd - thread + backup - where to store the old context + + RETURN + 0 - OK + 1 - Error (generates error too) +*/ +bool +Event_timed::change_security_context(THD *thd, Security_context *s_ctx, + Security_context **backup) +{ + DBUG_ENTER("Event_timed::change_security_context"); + DBUG_PRINT("info",("%s@%s@%s",definer_user.str,definer_host.str, dbname.str)); +#ifndef NO_EMBEDDED_ACCESS_CHECKS + s_ctx->init(); + *backup= 0; + if (acl_getroot_no_password(s_ctx, definer_user.str, definer_host.str, + definer_host.str, dbname.str)) + { + my_error(ER_NO_SUCH_USER, MYF(0), definer_user.str, definer_host.str); + DBUG_RETURN(TRUE); + } + *backup= thd->security_ctx; + thd->security_ctx= s_ctx; +#endif + DBUG_RETURN(FALSE); +} + + +/* + Restores the security context + Synopsis + Event_timed::restore_security_context() + thd - thread + backup - switch to this context +*/ + +void +Event_timed::restore_security_context(THD *thd, Security_context *backup) +{ + DBUG_ENTER("Event_timed::restore_security_context"); +#ifndef NO_EMBEDDED_ACCESS_CHECKS + if (backup) + thd->security_ctx= backup; +#endif + DBUG_VOID_RETURN; +} + + +/* + Compiles an event before it's execution. Compiles the anonymous + sp_head object held by the event + + SYNOPSIS + Event_timed::compile() + thd thread context, used for memory allocation mostly + mem_root if != NULL then this memory root is used for allocs + instead of thd->mem_root + + RETURN VALUE + 0 success + EVEX_COMPILE_ERROR error during compilation + EVEX_MICROSECOND_UNSUP mysql.event was tampered +*/ + +int +Event_timed::compile(THD *thd, MEM_ROOT *mem_root) +{ + int ret= 0; + MEM_ROOT *tmp_mem_root= 0; + LEX *old_lex= thd->lex, lex; + char *old_db; + int old_db_length; + char *old_query; + uint old_query_len; + ulong old_sql_mode= thd->variables.sql_mode; + char create_buf[2048]; + String show_create(create_buf, sizeof(create_buf), system_charset_info); + CHARSET_INFO *old_character_set_client, + *old_collation_connection, + *old_character_set_results; + + DBUG_ENTER("Event_timed::compile"); + + show_create.length(0); + + switch (get_create_event(thd, &show_create)) { + case EVEX_MICROSECOND_UNSUP: + sql_print_error("Scheduler"); + DBUG_RETURN(EVEX_MICROSECOND_UNSUP); + case 0: + break; + default: + DBUG_ASSERT(0); + } + + old_character_set_client= thd->variables.character_set_client; + old_character_set_results= thd->variables.character_set_results; + old_collation_connection= thd->variables.collation_connection; + + thd->variables.character_set_client= + thd->variables.character_set_results= + thd->variables.collation_connection= + get_charset_by_csname("utf8", MY_CS_PRIMARY, MYF(MY_WME)); + + thd->update_charset(); + + DBUG_PRINT("info",("old_sql_mode=%d new_sql_mode=%d",old_sql_mode, sql_mode)); + thd->variables.sql_mode= this->sql_mode; + /* Change the memory root for the execution time */ + if (mem_root) + { + tmp_mem_root= thd->mem_root; + thd->mem_root= mem_root; + } + old_query_len= thd->query_length; + old_query= thd->query; + old_db= thd->db; + old_db_length= thd->db_length; + thd->db= dbname.str; + thd->db_length= dbname.length; + + thd->query= show_create.c_ptr(); + thd->query_length= show_create.length(); + DBUG_PRINT("Event_timed::compile", ("query:%s",thd->query)); + + thd->lex= &lex; + lex_start(thd, (uchar*)thd->query, thd->query_length); + lex.et_compile_phase= TRUE; + if (yyparse((void *)thd) || thd->is_fatal_error) + { + DBUG_PRINT("error", ("error during compile or thd->is_fatal_error=%d", + thd->is_fatal_error)); + /* + Free lex associated resources + QQ: Do we really need all this stuff here? + */ + sql_print_error("error during compile of %s.%s or thd->is_fatal_error=%d", + dbname.str, name.str, thd->is_fatal_error); + if (lex.sphead) + { + if (&lex != thd->lex) + thd->lex->sphead->restore_lex(thd); + delete lex.sphead; + lex.sphead= 0; + } + ret= EVEX_COMPILE_ERROR; + goto done; + } + DBUG_PRINT("note", ("success compiling %s.%s", dbname.str, name.str)); + + sphead= lex.et->sphead; + sphead->m_db= dbname; + + sphead->set_definer(definer.str, definer.length); + sphead->set_info(0, 0, &lex.sp_chistics, sql_mode); + sphead->optimize(); + ret= 0; +done: + lex.et->free_sphead_on_delete= false; + delete lex.et; + lex_end(&lex); + DBUG_PRINT("note", ("return old data on its place. set back NAMES")); + + thd->lex= old_lex; + thd->query= old_query; + thd->query_length= old_query_len; + thd->db= old_db; + + thd->variables.sql_mode= old_sql_mode; + thd->variables.character_set_client= old_character_set_client; + thd->variables.character_set_results= old_character_set_results; + thd->variables.collation_connection= old_collation_connection; + thd->update_charset(); + + /* Change the memory root for the execution time. */ + if (mem_root) + thd->mem_root= tmp_mem_root; + + DBUG_RETURN(ret); +} + + +/* + Checks whether this thread can lock the object for modification -> + preventing being spawned for execution, and locks if possible. + use ::can_spawn_now() only for basic checking because a race + condition may occur between the check and eventual modification (deletion) + of the object. + + Returns + true - locked + false - cannot lock +*/ + +my_bool +Event_timed::can_spawn_now_n_lock(THD *thd) +{ + my_bool ret= FALSE; + VOID(pthread_mutex_lock(&this->LOCK_running)); + if (!in_spawned_thread) + { + in_spawned_thread= TRUE; + ret= TRUE; + locked_by_thread_id= thd->thread_id; + } + VOID(pthread_mutex_unlock(&this->LOCK_running)); + return ret; +} + + +extern pthread_attr_t connection_attrib; + +/* + Checks whether is possible and forks a thread. Passes self as argument. + + Returns + EVENT_EXEC_STARTED - OK + EVENT_EXEC_ALREADY_EXEC - Thread not forked, already working + EVENT_EXEC_CANT_FORK - Unable to spawn thread (error) +*/ + +int +Event_timed::spawn_now(void * (*thread_func)(void*)) +{ + int ret= EVENT_EXEC_STARTED; + static uint exec_num= 0; + DBUG_ENTER("Event_timed::spawn_now"); + DBUG_PRINT("info", ("[%s.%s]", dbname.str, name.str)); + + VOID(pthread_mutex_lock(&this->LOCK_running)); + if (!in_spawned_thread) + { + pthread_t th; + in_spawned_thread= true; + if (pthread_create(&th, &connection_attrib, thread_func, (void*)this)) + { + DBUG_PRINT("info", ("problem while spawning thread")); + ret= EVENT_EXEC_CANT_FORK; + in_spawned_thread= false; + } +#ifndef DBUG_OFF + else + { + sql_print_information("SCHEDULER: Started thread %d", ++exec_num); + DBUG_PRINT("info", ("thread spawned")); + } +#endif + } + else + { + DBUG_PRINT("info", ("already in spawned thread. skipping")); + ret= EVENT_EXEC_ALREADY_EXEC; + } + VOID(pthread_mutex_unlock(&this->LOCK_running)); + + DBUG_RETURN(ret); +} + + +void +Event_timed::spawn_thread_finish(THD *thd) +{ + DBUG_ENTER("Event_timed::spawn_thread_finish"); + VOID(pthread_mutex_lock(&this->LOCK_running)); + in_spawned_thread= false; + if ((flags & EVENT_EXEC_NO_MORE) || status == MYSQL_EVENT_DISABLED) + { + DBUG_PRINT("info", ("%s exec no more. to drop=%d", name.str, dropped)); + if (dropped) + drop(thd); + VOID(pthread_mutex_unlock(&this->LOCK_running)); + delete this; + DBUG_VOID_RETURN; + } + VOID(pthread_mutex_unlock(&this->LOCK_running)); + DBUG_VOID_RETURN; +} + + +/* + Unlocks the object after it has been locked with ::can_spawn_now_n_lock() + + Returns + 0 - ok + 1 - not locked by this thread +*/ + +int +Event_timed::spawn_unlock(THD *thd) +{ + int ret= 0; + VOID(pthread_mutex_lock(&this->LOCK_running)); + if (!in_spawned_thread) + { + if (locked_by_thread_id == thd->thread_id) + { + in_spawned_thread= FALSE; + locked_by_thread_id= 0; + } + else + { + sql_print_error("A thread tries to unlock when he hasn't locked. " + "thread_id=%ld locked by %ld", + thd->thread_id, locked_by_thread_id); + DBUG_ASSERT(0); + ret= 1; + } + } + VOID(pthread_mutex_unlock(&this->LOCK_running)); + return ret; +} diff --git a/sql/examples/ha_example.cc b/sql/examples/ha_example.cc deleted file mode 100644 index 471ece77490..00000000000 --- a/sql/examples/ha_example.cc +++ /dev/null @@ -1,701 +0,0 @@ -/* Copyright (C) 2003 MySQL AB - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - -/* - ha_example is a stubbed storage engine. It does nothing at this point. It - will let you create/open/delete tables but that is all. You can enable it - in your buld by doing the following during your build process: - ./configure --with-example-storage-engine - - Once this is done mysql will let you create tables with: - CREATE TABLE A (...) ENGINE=EXAMPLE; - - The example is setup to use table locks. It implements an example "SHARE" - that is inserted into a hash by table name. You can use this to store - information of state that any example handler object will be able to see - if it is using the same table. - - Please read the object definition in ha_example.h before reading the rest - if this file. - - To get an idea of what occurs here is an example select that would do a - scan of an entire table: - ha_example::store_lock - ha_example::external_lock - ha_example::info - ha_example::rnd_init - ha_example::extra - ENUM HA_EXTRA_CACHE Cash record in HA_rrnd() - ha_example::rnd_next - ha_example::rnd_next - ha_example::rnd_next - ha_example::rnd_next - ha_example::rnd_next - ha_example::rnd_next - ha_example::rnd_next - ha_example::rnd_next - ha_example::rnd_next - ha_example::extra - ENUM HA_EXTRA_NO_CACHE End cacheing of records (def) - ha_example::external_lock - ha_example::extra - ENUM HA_EXTRA_RESET Reset database to after open - - In the above example has 9 row called before rnd_next signalled that it was - at the end of its data. In the above example the table was already opened - (or you would have seen a call to ha_example::open(). Calls to - ha_example::extra() are hints as to what will be occuring to the request. - - Happy coding! - -Brian -*/ - -#ifdef USE_PRAGMA_IMPLEMENTATION -#pragma implementation // gcc: Class implementation -#endif - -#include "../mysql_priv.h" - -#ifdef HAVE_EXAMPLE_DB -#include "ha_example.h" - - -handlerton example_hton= { - "EXAMPLE", - SHOW_OPTION_YES, - "Example storage engine", - DB_TYPE_EXAMPLE_DB, - NULL, /* We do need to write one! */ - 0, /* slot */ - 0, /* savepoint size. */ - NULL, /* close_connection */ - NULL, /* savepoint */ - NULL, /* rollback to savepoint */ - NULL, /* release savepoint */ - NULL, /* commit */ - NULL, /* rollback */ - NULL, /* prepare */ - NULL, /* recover */ - NULL, /* commit_by_xid */ - NULL, /* rollback_by_xid */ - NULL, /* create_cursor_read_view */ - NULL, /* set_cursor_read_view */ - NULL, /* close_cursor_read_view */ - HTON_CAN_RECREATE -}; - -/* Variables for example share methods */ -static HASH example_open_tables; // Hash used to track open tables -pthread_mutex_t example_mutex; // This is the mutex we use to init the hash -static int example_init= 0; // Variable for checking the init state of hash - - -/* - Function we use in the creation of our hash to get key. -*/ -static byte* example_get_key(EXAMPLE_SHARE *share,uint *length, - my_bool not_used __attribute__((unused))) -{ - *length=share->table_name_length; - return (byte*) share->table_name; -} - - -/* - Example of simple lock controls. The "share" it creates is structure we will - pass to each example handler. Do you have to have one of these? Well, you have - pieces that are used for locking, and they are needed to function. -*/ -static EXAMPLE_SHARE *get_share(const char *table_name, TABLE *table) -{ - EXAMPLE_SHARE *share; - uint length; - char *tmp_name; - - /* - So why does this exist? There is no way currently to init a storage engine. - Innodb and BDB both have modifications to the server to allow them to - do this. Since you will not want to do this, this is probably the next - best method. - */ - if (!example_init) - { - /* Hijack a mutex for init'ing the storage engine */ - pthread_mutex_lock(&LOCK_mysql_create_db); - if (!example_init) - { - example_init++; - VOID(pthread_mutex_init(&example_mutex,MY_MUTEX_INIT_FAST)); - (void) hash_init(&example_open_tables,system_charset_info,32,0,0, - (hash_get_key) example_get_key,0,0); - } - pthread_mutex_unlock(&LOCK_mysql_create_db); - } - pthread_mutex_lock(&example_mutex); - length=(uint) strlen(table_name); - - if (!(share=(EXAMPLE_SHARE*) hash_search(&example_open_tables, - (byte*) table_name, - length))) - { - if (!(share=(EXAMPLE_SHARE *) - my_multi_malloc(MYF(MY_WME | MY_ZEROFILL), - &share, sizeof(*share), - &tmp_name, length+1, - NullS))) - { - pthread_mutex_unlock(&example_mutex); - return NULL; - } - - share->use_count=0; - share->table_name_length=length; - share->table_name=tmp_name; - strmov(share->table_name,table_name); - if (my_hash_insert(&example_open_tables, (byte*) share)) - goto error; - thr_lock_init(&share->lock); - pthread_mutex_init(&share->mutex,MY_MUTEX_INIT_FAST); - } - share->use_count++; - pthread_mutex_unlock(&example_mutex); - - return share; - -error: - pthread_mutex_destroy(&share->mutex); - pthread_mutex_unlock(&example_mutex); - my_free((gptr) share, MYF(0)); - - return NULL; -} - - -/* - Free lock controls. We call this whenever we close a table. If the table had - the last reference to the share then we free memory associated with it. -*/ -static int free_share(EXAMPLE_SHARE *share) -{ - pthread_mutex_lock(&example_mutex); - if (!--share->use_count) - { - hash_delete(&example_open_tables, (byte*) share); - thr_lock_delete(&share->lock); - pthread_mutex_destroy(&share->mutex); - my_free((gptr) share, MYF(0)); - } - pthread_mutex_unlock(&example_mutex); - - return 0; -} - - -ha_example::ha_example(TABLE *table_arg) - :handler(&example_hton, table_arg) -{} - -/* - If frm_error() is called then we will use this to to find out what file extentions - exist for the storage engine. This is also used by the default rename_table and - delete_table method in handler.cc. -*/ -static const char *ha_example_exts[] = { - NullS -}; - -const char **ha_example::bas_ext() const -{ - return ha_example_exts; -} - - -/* - Used for opening tables. The name will be the name of the file. - A table is opened when it needs to be opened. For instance - when a request comes in for a select on the table (tables are not - open and closed for each request, they are cached). - - Called from handler.cc by handler::ha_open(). The server opens all tables by - calling ha_open() which then calls the handler specific open(). -*/ -int ha_example::open(const char *name, int mode, uint test_if_locked) -{ - DBUG_ENTER("ha_example::open"); - - if (!(share = get_share(name, table))) - DBUG_RETURN(1); - thr_lock_data_init(&share->lock,&lock,NULL); - - DBUG_RETURN(0); -} - - -/* - Closes a table. We call the free_share() function to free any resources - that we have allocated in the "shared" structure. - - Called from sql_base.cc, sql_select.cc, and table.cc. - In sql_select.cc it is only used to close up temporary tables or during - the process where a temporary table is converted over to being a - myisam table. - For sql_base.cc look at close_data_tables(). -*/ -int ha_example::close(void) -{ - DBUG_ENTER("ha_example::close"); - DBUG_RETURN(free_share(share)); -} - - -/* - write_row() inserts a row. No extra() hint is given currently if a bulk load - is happeneding. buf() is a byte array of data. You can use the field - information to extract the data from the native byte array type. - Example of this would be: - for (Field **field=table->field ; *field ; field++) - { - ... - } - - See ha_tina.cc for an example of extracting all of the data as strings. - ha_berekly.cc has an example of how to store it intact by "packing" it - for ha_berkeley's own native storage type. - - See the note for update_row() on auto_increments and timestamps. This - case also applied to write_row(). - - Called from item_sum.cc, item_sum.cc, sql_acl.cc, sql_insert.cc, - sql_insert.cc, sql_select.cc, sql_table.cc, sql_udf.cc, and sql_update.cc. -*/ -int ha_example::write_row(byte * buf) -{ - DBUG_ENTER("ha_example::write_row"); - DBUG_RETURN(HA_ERR_WRONG_COMMAND); -} - - -/* - Yes, update_row() does what you expect, it updates a row. old_data will have - the previous row record in it, while new_data will have the newest data in - it. - Keep in mind that the server can do updates based on ordering if an ORDER BY - clause was used. Consecutive ordering is not guarenteed. - Currently new_data will not have an updated auto_increament record, or - and updated timestamp field. You can do these for example by doing these: - if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE) - table->timestamp_field->set_time(); - if (table->next_number_field && record == table->record[0]) - update_auto_increment(); - - Called from sql_select.cc, sql_acl.cc, sql_update.cc, and sql_insert.cc. -*/ -int ha_example::update_row(const byte * old_data, byte * new_data) -{ - - DBUG_ENTER("ha_example::update_row"); - DBUG_RETURN(HA_ERR_WRONG_COMMAND); -} - - -/* - This will delete a row. buf will contain a copy of the row to be deleted. - The server will call this right after the current row has been called (from - either a previous rnd_nexT() or index call). - If you keep a pointer to the last row or can access a primary key it will - make doing the deletion quite a bit easier. - Keep in mind that the server does no guarentee consecutive deletions. ORDER BY - clauses can be used. - - Called in sql_acl.cc and sql_udf.cc to manage internal table information. - Called in sql_delete.cc, sql_insert.cc, and sql_select.cc. In sql_select it is - used for removing duplicates while in insert it is used for REPLACE calls. -*/ -int ha_example::delete_row(const byte * buf) -{ - DBUG_ENTER("ha_example::delete_row"); - DBUG_RETURN(HA_ERR_WRONG_COMMAND); -} - - -/* - Positions an index cursor to the index specified in the handle. Fetches the - row if available. If the key value is null, begin at the first key of the - index. -*/ -int ha_example::index_read(byte * buf, const byte * key, - uint key_len __attribute__((unused)), - enum ha_rkey_function find_flag - __attribute__((unused))) -{ - DBUG_ENTER("ha_example::index_read"); - DBUG_RETURN(HA_ERR_WRONG_COMMAND); -} - - -/* - Positions an index cursor to the index specified in key. Fetches the - row if any. This is only used to read whole keys. -*/ -int ha_example::index_read_idx(byte * buf, uint index, const byte * key, - uint key_len __attribute__((unused)), - enum ha_rkey_function find_flag - __attribute__((unused))) -{ - DBUG_ENTER("ha_example::index_read_idx"); - DBUG_RETURN(HA_ERR_WRONG_COMMAND); -} - - -/* - Used to read forward through the index. -*/ -int ha_example::index_next(byte * buf) -{ - DBUG_ENTER("ha_example::index_next"); - DBUG_RETURN(HA_ERR_WRONG_COMMAND); -} - - -/* - Used to read backwards through the index. -*/ -int ha_example::index_prev(byte * buf) -{ - DBUG_ENTER("ha_example::index_prev"); - DBUG_RETURN(HA_ERR_WRONG_COMMAND); -} - - -/* - index_first() asks for the first key in the index. - - Called from opt_range.cc, opt_sum.cc, sql_handler.cc, - and sql_select.cc. -*/ -int ha_example::index_first(byte * buf) -{ - DBUG_ENTER("ha_example::index_first"); - DBUG_RETURN(HA_ERR_WRONG_COMMAND); -} - - -/* - index_last() asks for the last key in the index. - - Called from opt_range.cc, opt_sum.cc, sql_handler.cc, - and sql_select.cc. -*/ -int ha_example::index_last(byte * buf) -{ - DBUG_ENTER("ha_example::index_last"); - DBUG_RETURN(HA_ERR_WRONG_COMMAND); -} - - -/* - rnd_init() is called when the system wants the storage engine to do a table - scan. - See the example in the introduction at the top of this file to see when - rnd_init() is called. - - Called from filesort.cc, records.cc, sql_handler.cc, sql_select.cc, sql_table.cc, - and sql_update.cc. -*/ -int ha_example::rnd_init(bool scan) -{ - DBUG_ENTER("ha_example::rnd_init"); - DBUG_RETURN(HA_ERR_WRONG_COMMAND); -} - -int ha_example::rnd_end() -{ - DBUG_ENTER("ha_example::rnd_end"); - DBUG_RETURN(0); -} - -/* - This is called for each row of the table scan. When you run out of records - you should return HA_ERR_END_OF_FILE. Fill buff up with the row information. - The Field structure for the table is the key to getting data into buf - in a manner that will allow the server to understand it. - - Called from filesort.cc, records.cc, sql_handler.cc, sql_select.cc, sql_table.cc, - and sql_update.cc. -*/ -int ha_example::rnd_next(byte *buf) -{ - DBUG_ENTER("ha_example::rnd_next"); - DBUG_RETURN(HA_ERR_END_OF_FILE); -} - - -/* - position() is called after each call to rnd_next() if the data needs - to be ordered. You can do something like the following to store - the position: - my_store_ptr(ref, ref_length, current_position); - - The server uses ref to store data. ref_length in the above case is - the size needed to store current_position. ref is just a byte array - that the server will maintain. If you are using offsets to mark rows, then - current_position should be the offset. If it is a primary key like in - BDB, then it needs to be a primary key. - - Called from filesort.cc, sql_select.cc, sql_delete.cc and sql_update.cc. -*/ -void ha_example::position(const byte *record) -{ - DBUG_ENTER("ha_example::position"); - DBUG_VOID_RETURN; -} - - -/* - This is like rnd_next, but you are given a position to use - to determine the row. The position will be of the type that you stored in - ref. You can use ha_get_ptr(pos,ref_length) to retrieve whatever key - or position you saved when position() was called. - Called from filesort.cc records.cc sql_insert.cc sql_select.cc sql_update.cc. -*/ -int ha_example::rnd_pos(byte * buf, byte *pos) -{ - DBUG_ENTER("ha_example::rnd_pos"); - DBUG_RETURN(HA_ERR_WRONG_COMMAND); -} - - -/* - ::info() is used to return information to the optimizer. - see my_base.h for the complete description - - Currently this table handler doesn't implement most of the fields - really needed. SHOW also makes use of this data - Another note, you will probably want to have the following in your - code: - if (records < 2) - records = 2; - The reason is that the server will optimize for cases of only a single - record. If in a table scan you don't know the number of records - it will probably be better to set records to two so you can return - as many records as you need. - Along with records a few more variables you may wish to set are: - records - deleted - data_file_length - index_file_length - delete_length - check_time - Take a look at the public variables in handler.h for more information. - - Called in: - filesort.cc - ha_heap.cc - item_sum.cc - opt_sum.cc - sql_delete.cc - sql_delete.cc - sql_derived.cc - sql_select.cc - sql_select.cc - sql_select.cc - sql_select.cc - sql_select.cc - sql_show.cc - sql_show.cc - sql_show.cc - sql_show.cc - sql_table.cc - sql_union.cc - sql_update.cc - -*/ -void ha_example::info(uint flag) -{ - DBUG_ENTER("ha_example::info"); - DBUG_VOID_RETURN; -} - - -/* - extra() is called whenever the server wishes to send a hint to - the storage engine. The myisam engine implements the most hints. - ha_innodb.cc has the most exhaustive list of these hints. -*/ -int ha_example::extra(enum ha_extra_function operation) -{ - DBUG_ENTER("ha_example::extra"); - DBUG_RETURN(0); -} - - -/* - Deprecated and likely to be removed in the future. Storage engines normally - just make a call like: - ha_example::extra(HA_EXTRA_RESET); - to handle it. -*/ -int ha_example::reset(void) -{ - DBUG_ENTER("ha_example::reset"); - DBUG_RETURN(0); -} - - -/* - Used to delete all rows in a table. Both for cases of truncate and - for cases where the optimizer realizes that all rows will be - removed as a result of a SQL statement. - - Called from item_sum.cc by Item_func_group_concat::clear(), - Item_sum_count_distinct::clear(), and Item_func_group_concat::clear(). - Called from sql_delete.cc by mysql_delete(). - Called from sql_select.cc by JOIN::reinit(). - Called from sql_union.cc by st_select_lex_unit::exec(). -*/ -int ha_example::delete_all_rows() -{ - DBUG_ENTER("ha_example::delete_all_rows"); - DBUG_RETURN(HA_ERR_WRONG_COMMAND); -} - - -/* - First you should go read the section "locking functions for mysql" in - lock.cc to understand this. - This create a lock on the table. If you are implementing a storage engine - that can handle transacations look at ha_berkely.cc to see how you will - want to goo about doing this. Otherwise you should consider calling flock() - here. - - Called from lock.cc by lock_external() and unlock_external(). Also called - from sql_table.cc by copy_data_between_tables(). -*/ -int ha_example::external_lock(THD *thd, int lock_type) -{ - DBUG_ENTER("ha_example::external_lock"); - DBUG_RETURN(0); -} - - -/* - The idea with handler::store_lock() is the following: - - The statement decided which locks we should need for the table - for updates/deletes/inserts we get WRITE locks, for SELECT... we get - read locks. - - Before adding the lock into the table lock handler (see thr_lock.c) - mysqld calls store lock with the requested locks. Store lock can now - modify a write lock to a read lock (or some other lock), ignore the - lock (if we don't want to use MySQL table locks at all) or add locks - for many tables (like we do when we are using a MERGE handler). - - Berkeley DB for example changes all WRITE locks to TL_WRITE_ALLOW_WRITE - (which signals that we are doing WRITES, but we are still allowing other - reader's and writer's. - - When releasing locks, store_lock() are also called. In this case one - usually doesn't have to do anything. - - In some exceptional cases MySQL may send a request for a TL_IGNORE; - This means that we are requesting the same lock as last time and this - should also be ignored. (This may happen when someone does a flush - table when we have opened a part of the tables, in which case mysqld - closes and reopens the tables and tries to get the same locks at last - time). In the future we will probably try to remove this. - - Called from lock.cc by get_lock_data(). -*/ -THR_LOCK_DATA **ha_example::store_lock(THD *thd, - THR_LOCK_DATA **to, - enum thr_lock_type lock_type) -{ - if (lock_type != TL_IGNORE && lock.type == TL_UNLOCK) - lock.type=lock_type; - *to++= &lock; - return to; -} - -/* - Used to delete a table. By the time delete_table() has been called all - opened references to this table will have been closed (and your globally - shared references released. The variable name will just be the name of - the table. You will need to remove any files you have created at this point. - - If you do not implement this, the default delete_table() is called from - handler.cc and it will delete all files with the file extentions returned - by bas_ext(). - - Called from handler.cc by delete_table and ha_create_table(). Only used - during create if the table_flag HA_DROP_BEFORE_CREATE was specified for - the storage engine. -*/ -int ha_example::delete_table(const char *name) -{ - DBUG_ENTER("ha_example::delete_table"); - /* This is not implemented but we want someone to be able that it works. */ - DBUG_RETURN(0); -} - -/* - Renames a table from one name to another from alter table call. - - If you do not implement this, the default rename_table() is called from - handler.cc and it will delete all files with the file extentions returned - by bas_ext(). - - Called from sql_table.cc by mysql_rename_table(). -*/ -int ha_example::rename_table(const char * from, const char * to) -{ - DBUG_ENTER("ha_example::rename_table "); - DBUG_RETURN(HA_ERR_WRONG_COMMAND); -} - -/* - Given a starting key, and an ending key estimate the number of rows that - will exist between the two. end_key may be empty which in case determine - if start_key matches any rows. - - Called from opt_range.cc by check_quick_keys(). -*/ -ha_rows ha_example::records_in_range(uint inx, key_range *min_key, - key_range *max_key) -{ - DBUG_ENTER("ha_example::records_in_range"); - DBUG_RETURN(10); // low number to force index usage -} - - -/* - create() is called to create a database. The variable name will have the name - of the table. When create() is called you do not need to worry about opening - the table. Also, the FRM file will have already been created so adjusting - create_info will not do you any good. You can overwrite the frm file at this - point if you wish to change the table definition, but there are no methods - currently provided for doing that. - - Called from handle.cc by ha_create_table(). -*/ -int ha_example::create(const char *name, TABLE *table_arg, - HA_CREATE_INFO *create_info) -{ - DBUG_ENTER("ha_example::create"); - /* This is not implemented but we want someone to be able that it works. */ - DBUG_RETURN(0); -} -#endif /* HAVE_EXAMPLE_DB */ diff --git a/sql/examples/ha_example.h b/sql/examples/ha_example.h deleted file mode 100644 index 37f38fe5210..00000000000 --- a/sql/examples/ha_example.h +++ /dev/null @@ -1,154 +0,0 @@ -/* Copyright (C) 2003 MySQL AB - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - -/* - Please read ha_exmple.cc before reading this file. - Please keep in mind that the example storage engine implements all methods - that are required to be implemented. handler.h has a full list of methods - that you can implement. -*/ - -#ifdef USE_PRAGMA_INTERFACE -#pragma interface /* gcc class implementation */ -#endif - -/* - EXAMPLE_SHARE is a structure that will be shared amoung all open handlers - The example implements the minimum of what you will probably need. -*/ -typedef struct st_example_share { - char *table_name; - uint table_name_length,use_count; - pthread_mutex_t mutex; - THR_LOCK lock; -} EXAMPLE_SHARE; - -/* - Class definition for the storage engine -*/ -class ha_example: public handler -{ - THR_LOCK_DATA lock; /* MySQL lock */ - EXAMPLE_SHARE *share; /* Shared lock info */ - -public: - ha_example(TABLE *table_arg); - ~ha_example() - { - } - /* The name that will be used for display purposes */ - const char *table_type() const { return "EXAMPLE"; } - /* - The name of the index type that will be used for display - don't implement this method unless you really have indexes - */ - const char *index_type(uint inx) { return "HASH"; } - const char **bas_ext() const; - /* - This is a list of flags that says what the storage engine - implements. The current table flags are documented in - handler.h - */ - ulong table_flags() const - { - return 0; - } - /* - This is a bitmap of flags that says how the storage engine - implements indexes. The current index flags are documented in - handler.h. If you do not implement indexes, just return zero - here. - - part is the key part to check. First key part is 0 - If all_parts it's set, MySQL want to know the flags for the combined - index up to and including 'part'. - */ - ulong index_flags(uint inx, uint part, bool all_parts) const - { - return 0; - } - /* - unireg.cc will call the following to make sure that the storage engine can - handle the data it is about to send. - - Return *real* limits of your storage engine here. MySQL will do - min(your_limits, MySQL_limits) automatically - - There is no need to implement ..._key_... methods if you don't suport - indexes. - */ - uint max_supported_record_length() const { return HA_MAX_REC_LENGTH; } - uint max_supported_keys() const { return 0; } - uint max_supported_key_parts() const { return 0; } - uint max_supported_key_length() const { return 0; } - /* - Called in test_quick_select to determine if indexes should be used. - */ - virtual double scan_time() { return (double) (records+deleted) / 20.0+10; } - /* - The next method will never be called if you do not implement indexes. - */ - virtual double read_time(ha_rows rows) { return (double) rows / 20.0+1; } - - /* - Everything below are methods that we implment in ha_example.cc. - - Most of these methods are not obligatory, skip them and - MySQL will treat them as not implemented - */ - int open(const char *name, int mode, uint test_if_locked); // required - int close(void); // required - - int write_row(byte * buf); - int update_row(const byte * old_data, byte * new_data); - int delete_row(const byte * buf); - int index_read(byte * buf, const byte * key, - uint key_len, enum ha_rkey_function find_flag); - int index_read_idx(byte * buf, uint idx, const byte * key, - uint key_len, enum ha_rkey_function find_flag); - int index_next(byte * buf); - int index_prev(byte * buf); - int index_first(byte * buf); - int index_last(byte * buf); - /* - unlike index_init(), rnd_init() can be called two times - without rnd_end() in between (it only makes sense if scan=1). - then the second call should prepare for the new table scan - (e.g if rnd_init allocates the cursor, second call should - position it to the start of the table, no need to deallocate - and allocate it again - */ - int rnd_init(bool scan); //required - int rnd_end(); - int rnd_next(byte *buf); //required - int rnd_pos(byte * buf, byte *pos); //required - void position(const byte *record); //required - void info(uint); //required - - int extra(enum ha_extra_function operation); - int reset(void); - int external_lock(THD *thd, int lock_type); //required - int delete_all_rows(void); - ha_rows records_in_range(uint inx, key_range *min_key, - key_range *max_key); - int delete_table(const char *from); - int rename_table(const char * from, const char * to); - int create(const char *name, TABLE *form, - HA_CREATE_INFO *create_info); //required - - THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to, - enum thr_lock_type lock_type); //required -}; diff --git a/sql/examples/ha_tina.cc b/sql/examples/ha_tina.cc deleted file mode 100644 index 8ae82f97d0b..00000000000 --- a/sql/examples/ha_tina.cc +++ /dev/null @@ -1,893 +0,0 @@ -/* Copyright (C) 2003 MySQL AB - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - -/* - Make sure to look at ha_tina.h for more details. - - First off, this is a play thing for me, there are a number of things wrong with it: - *) It was designed for csv and therefor its performance is highly questionable. - *) Indexes have not been implemented. This is because the files can be traded in - and out of the table directory without having to worry about rebuilding anything. - *) NULLs and "" are treated equally (like a spreadsheet). - *) There was in the beginning no point to anyone seeing this other then me, so there - is a good chance that I haven't quite documented it well. - *) Less design, more "make it work" - - Now there are a few cool things with it: - *) Errors can result in corrupted data files. - *) Data files can be read by spreadsheets directly. - -TODO: - *) Move to a block system for larger files - *) Error recovery, its all there, just need to finish it - *) Document how the chains work. - - -Brian -*/ - -#ifdef USE_PRAGMA_IMPLEMENTATION -#pragma implementation // gcc: Class implementation -#endif - -#include "mysql_priv.h" - -#ifdef HAVE_CSV_DB - -#include "ha_tina.h" -#include <sys/mman.h> - -/* Stuff for shares */ -pthread_mutex_t tina_mutex; -static HASH tina_open_tables; -static int tina_init= 0; - -handlerton tina_hton= { - "CSV", - SHOW_OPTION_YES, - "CSV storage engine", - DB_TYPE_CSV_DB, - NULL, /* One needs to be written! */ - 0, /* slot */ - 0, /* savepoint size. */ - NULL, /* close_connection */ - NULL, /* savepoint */ - NULL, /* rollback to savepoint */ - NULL, /* release savepoint */ - NULL, /* commit */ - NULL, /* rollback */ - NULL, /* prepare */ - NULL, /* recover */ - NULL, /* commit_by_xid */ - NULL, /* rollback_by_xid */ - NULL, /* create_cursor_read_view */ - NULL, /* set_cursor_read_view */ - NULL, /* close_cursor_read_view */ - HTON_CAN_RECREATE -}; - -/***************************************************************************** - ** TINA tables - *****************************************************************************/ - -/* - Used for sorting chains with qsort(). -*/ -int sort_set (tina_set *a, tina_set *b) -{ - /* - We assume that intervals do not intersect. So, it is enought to compare - any two points. Here we take start of intervals for comparison. - */ - return ( a->begin > b->begin ? -1 : ( a->begin < b->begin ? 1 : 0 ) ); -} - -static byte* tina_get_key(TINA_SHARE *share,uint *length, - my_bool not_used __attribute__((unused))) -{ - *length=share->table_name_length; - return (byte*) share->table_name; -} - -/* - Reloads the mmap file. -*/ -int get_mmap(TINA_SHARE *share, int write) -{ - DBUG_ENTER("ha_tina::get_mmap"); - if (share->mapped_file && munmap(share->mapped_file, share->file_stat.st_size)) - DBUG_RETURN(1); - - if (my_fstat(share->data_file, &share->file_stat, MYF(MY_WME)) == -1) - DBUG_RETURN(1); - - if (share->file_stat.st_size) - { - if (write) - share->mapped_file= (byte *)mmap(NULL, share->file_stat.st_size, - PROT_READ|PROT_WRITE, MAP_SHARED, - share->data_file, 0); - else - share->mapped_file= (byte *)mmap(NULL, share->file_stat.st_size, - PROT_READ, MAP_PRIVATE, - share->data_file, 0); - if ((share->mapped_file ==(caddr_t)-1)) - { - /* - Bad idea you think? See the problem is that nothing actually checks - the return value of ::rnd_init(), so tossing an error is about - it for us. - Never going to happen right? :) - */ - my_message(errno, "Woops, blew up opening a mapped file", 0); - DBUG_ASSERT(0); - DBUG_RETURN(1); - } - } - else - share->mapped_file= NULL; - - DBUG_RETURN(0); -} - -/* - Simple lock controls. -*/ -static TINA_SHARE *get_share(const char *table_name, TABLE *table) -{ - TINA_SHARE *share; - char *tmp_name; - uint length; - - if (!tina_init) - { - /* Hijack a mutex for init'ing the storage engine */ - pthread_mutex_lock(&LOCK_mysql_create_db); - if (!tina_init) - { - tina_init++; - VOID(pthread_mutex_init(&tina_mutex,MY_MUTEX_INIT_FAST)); - (void) hash_init(&tina_open_tables,system_charset_info,32,0,0, - (hash_get_key) tina_get_key,0,0); - } - pthread_mutex_unlock(&LOCK_mysql_create_db); - } - pthread_mutex_lock(&tina_mutex); - length=(uint) strlen(table_name); - if (!(share=(TINA_SHARE*) hash_search(&tina_open_tables, - (byte*) table_name, - length))) - { - char data_file_name[FN_REFLEN]; - if (!my_multi_malloc(MYF(MY_WME | MY_ZEROFILL), - &share, sizeof(*share), - &tmp_name, length+1, - NullS)) - { - pthread_mutex_unlock(&tina_mutex); - return NULL; - } - - share->use_count=0; - share->table_name_length=length; - share->table_name=tmp_name; - strmov(share->table_name,table_name); - fn_format(data_file_name, table_name, "", ".CSV",MY_REPLACE_EXT|MY_UNPACK_FILENAME); - if (my_hash_insert(&tina_open_tables, (byte*) share)) - goto error; - thr_lock_init(&share->lock); - pthread_mutex_init(&share->mutex,MY_MUTEX_INIT_FAST); - - if ((share->data_file= my_open(data_file_name, O_RDWR|O_APPEND, - MYF(0))) == -1) - goto error2; - - /* We only use share->data_file for writing, so we scan to the end to append */ - if (my_seek(share->data_file, 0, SEEK_END, MYF(0)) == MY_FILEPOS_ERROR) - goto error2; - - share->mapped_file= NULL; // We don't know the state since we just allocated it - if (get_mmap(share, 0) > 0) - goto error3; - } - share->use_count++; - pthread_mutex_unlock(&tina_mutex); - - return share; - -error3: - my_close(share->data_file,MYF(0)); -error2: - thr_lock_delete(&share->lock); - pthread_mutex_destroy(&share->mutex); -error: - pthread_mutex_unlock(&tina_mutex); - my_free((gptr) share, MYF(0)); - - return NULL; -} - - -/* - Free lock controls. -*/ -static int free_share(TINA_SHARE *share) -{ - DBUG_ENTER("ha_tina::free_share"); - pthread_mutex_lock(&tina_mutex); - int result_code= 0; - if (!--share->use_count){ - /* Drop the mapped file */ - if (share->mapped_file) - munmap(share->mapped_file, share->file_stat.st_size); - result_code= my_close(share->data_file,MYF(0)); - hash_delete(&tina_open_tables, (byte*) share); - thr_lock_delete(&share->lock); - pthread_mutex_destroy(&share->mutex); - my_free((gptr) share, MYF(0)); - } - pthread_mutex_unlock(&tina_mutex); - - DBUG_RETURN(result_code); -} - -bool tina_end() -{ - if (tina_init) - { - hash_free(&tina_open_tables); - VOID(pthread_mutex_destroy(&tina_mutex)); - } - tina_init= 0; - return FALSE; -} - -/* - Finds the end of a line. - Currently only supports files written on a UNIX OS. -*/ -byte * find_eoln(byte *data, off_t begin, off_t end) -{ - for (off_t x= begin; x < end; x++) - if (data[x] == '\n') - return data + x; - - return 0; -} - - -ha_tina::ha_tina(TABLE *table_arg) - :handler(&tina_hton, table_arg), - /* - These definitions are found in hanler.h - These are not probably completely right. - */ - current_position(0), next_position(0), chain_alloced(0), - chain_size(DEFAULT_CHAIN_LENGTH), records_is_known(0) -{ - /* Set our original buffers from pre-allocated memory */ - buffer.set(byte_buffer, IO_SIZE, system_charset_info); - chain= chain_buffer; -} - -/* - Encode a buffer into the quoted format. -*/ -int ha_tina::encode_quote(byte *buf) -{ - char attribute_buffer[1024]; - String attribute(attribute_buffer, sizeof(attribute_buffer), &my_charset_bin); - - buffer.length(0); - for (Field **field=table->field ; *field ; field++) - { - const char *ptr; - const char *end_ptr; - - (*field)->val_str(&attribute,&attribute); - ptr= attribute.ptr(); - end_ptr= attribute.length() + ptr; - - buffer.append('"'); - - while (ptr < end_ptr) - { - if (*ptr == '"') - { - buffer.append('\\'); - buffer.append('"'); - *ptr++; - } - else if (*ptr == '\r') - { - buffer.append('\\'); - buffer.append('r'); - *ptr++; - } - else if (*ptr == '\\') - { - buffer.append('\\'); - buffer.append('\\'); - *ptr++; - } - else if (*ptr == '\n') - { - buffer.append('\\'); - buffer.append('n'); - *ptr++; - } - else - buffer.append(*ptr++); - } - buffer.append('"'); - buffer.append(','); - } - // Remove the comma, add a line feed - buffer.length(buffer.length() - 1); - buffer.append('\n'); - //buffer.replace(buffer.length(), 0, "\n", 1); - - return (buffer.length()); -} - -/* - chain_append() adds delete positions to the chain that we use to keep track of space. -*/ -int ha_tina::chain_append() -{ - if ( chain_ptr != chain && (chain_ptr -1)->end == current_position) - (chain_ptr -1)->end= next_position; - else - { - /* We set up for the next position */ - if ((off_t)(chain_ptr - chain) == (chain_size -1)) - { - off_t location= chain_ptr - chain; - chain_size += DEFAULT_CHAIN_LENGTH; - if (chain_alloced) - { - /* Must cast since my_malloc unlike malloc doesn't have a void ptr */ - if ((chain= (tina_set *)my_realloc((gptr)chain,chain_size,MYF(MY_WME))) == NULL) - return -1; - } - else - { - tina_set *ptr= (tina_set *)my_malloc(chain_size * sizeof(tina_set),MYF(MY_WME)); - memcpy(ptr, chain, DEFAULT_CHAIN_LENGTH * sizeof(tina_set)); - chain= ptr; - chain_alloced++; - } - chain_ptr= chain + location; - } - chain_ptr->begin= current_position; - chain_ptr->end= next_position; - chain_ptr++; - } - - return 0; -} - - -/* - Scans for a row. -*/ -int ha_tina::find_current_row(byte *buf) -{ - byte *mapped_ptr= (byte *)share->mapped_file + current_position; - byte *end_ptr; - DBUG_ENTER("ha_tina::find_current_row"); - - /* EOF should be counted as new line */ - if ((end_ptr= find_eoln(share->mapped_file, current_position, share->file_stat.st_size)) == 0) - DBUG_RETURN(HA_ERR_END_OF_FILE); - - for (Field **field=table->field ; *field ; field++) - { - buffer.length(0); - mapped_ptr++; // Increment past the first quote - for(;mapped_ptr != end_ptr; mapped_ptr++) - { - //Need to convert line feeds! - if (*mapped_ptr == '"' && - (((mapped_ptr[1] == ',') && (mapped_ptr[2] == '"')) || (mapped_ptr == end_ptr -1 ))) - { - mapped_ptr += 2; // Move past the , and the " - break; - } - if (*mapped_ptr == '\\' && mapped_ptr != (end_ptr - 1)) - { - mapped_ptr++; - if (*mapped_ptr == 'r') - buffer.append('\r'); - else if (*mapped_ptr == 'n' ) - buffer.append('\n'); - else if ((*mapped_ptr == '\\') || (*mapped_ptr == '"')) - buffer.append(*mapped_ptr); - else /* This could only happed with an externally created file */ - { - buffer.append('\\'); - buffer.append(*mapped_ptr); - } - } - else - buffer.append(*mapped_ptr); - } - (*field)->store(buffer.ptr(), buffer.length(), system_charset_info); - } - next_position= (end_ptr - share->mapped_file)+1; - /* Maybe use \N for null? */ - memset(buf, 0, table->s->null_bytes); /* We do not implement nulls! */ - - DBUG_RETURN(0); -} - -/* - If frm_error() is called in table.cc this is called to find out what file - extensions exist for this handler. -*/ -static const char *ha_tina_exts[] = { - ".CSV", - NullS -}; - -const char **ha_tina::bas_ext() const -{ - return ha_tina_exts; -} - - -/* - Open a database file. Keep in mind that tables are caches, so - this will not be called for every request. Any sort of positions - that need to be reset should be kept in the ::extra() call. -*/ -int ha_tina::open(const char *name, int mode, uint test_if_locked) -{ - DBUG_ENTER("ha_tina::open"); - - if (!(share= get_share(name, table))) - DBUG_RETURN(1); - thr_lock_data_init(&share->lock,&lock,NULL); - ref_length=sizeof(off_t); - - DBUG_RETURN(0); -} - - -/* - Close a database file. We remove ourselves from the shared strucutre. - If it is empty we destroy it and free the mapped file. -*/ -int ha_tina::close(void) -{ - DBUG_ENTER("ha_tina::close"); - DBUG_RETURN(free_share(share)); -} - -/* - This is an INSERT. At the moment this handler just seeks to the end - of the file and appends the data. In an error case it really should - just truncate to the original position (this is not done yet). -*/ -int ha_tina::write_row(byte * buf) -{ - int size; - DBUG_ENTER("ha_tina::write_row"); - - statistic_increment(table->in_use->status_var.ha_write_count, &LOCK_status); - - if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT) - table->timestamp_field->set_time(); - - size= encode_quote(buf); - - if (my_write(share->data_file, buffer.ptr(), size, MYF(MY_WME | MY_NABP))) - DBUG_RETURN(-1); - - /* - Ok, this is means that we will be doing potentially bad things - during a bulk insert on some OS'es. What we need is a cleanup - call for ::write_row that would let us fix up everything after the bulk - insert. The archive handler does this with an extra mutx call, which - might be a solution for this. - */ - if (get_mmap(share, 0) > 0) - DBUG_RETURN(-1); - records++; - DBUG_RETURN(0); -} - - -/* - This is called for an update. - Make sure you put in code to increment the auto increment, also - update any timestamp data. Currently auto increment is not being - fixed since autoincrements have yet to be added to this table handler. - This will be called in a table scan right before the previous ::rnd_next() - call. -*/ -int ha_tina::update_row(const byte * old_data, byte * new_data) -{ - int size; - DBUG_ENTER("ha_tina::update_row"); - - statistic_increment(table->in_use->status_var.ha_read_rnd_next_count, - &LOCK_status); - - if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE) - table->timestamp_field->set_time(); - - size= encode_quote(new_data); - - if (chain_append()) - DBUG_RETURN(-1); - - if (my_write(share->data_file, buffer.ptr(), size, MYF(MY_WME | MY_NABP))) - DBUG_RETURN(-1); - DBUG_RETURN(0); -} - - -/* - Deletes a row. First the database will find the row, and then call this method. - In the case of a table scan, the previous call to this will be the ::rnd_next() - that found this row. - The exception to this is an ORDER BY. This will cause the table handler to walk - the table noting the positions of all rows that match a query. The table will - then be deleted/positioned based on the ORDER (so RANDOM, DESC, ASC). -*/ -int ha_tina::delete_row(const byte * buf) -{ - DBUG_ENTER("ha_tina::delete_row"); - statistic_increment(table->in_use->status_var.ha_delete_count,&LOCK_status); - - if (chain_append()) - DBUG_RETURN(-1); - - --records; - - DBUG_RETURN(0); -} - -/* - Fill buf with value from key. Simply this is used for a single index read - with a key. -*/ -int ha_tina::index_read(byte * buf, const byte * key, - uint key_len __attribute__((unused)), - enum ha_rkey_function find_flag - __attribute__((unused))) -{ - DBUG_ENTER("ha_tina::index_read"); - DBUG_ASSERT(0); - DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED); -} - -/* - Fill buf with value from key. Simply this is used for a single index read - with a key. - Whatever the current key is we will use it. This is what will be in "index". -*/ -int ha_tina::index_read_idx(byte * buf, uint index, const byte * key, - uint key_len __attribute__((unused)), - enum ha_rkey_function find_flag - __attribute__((unused))) -{ - DBUG_ENTER("ha_tina::index_read_idx"); - DBUG_ASSERT(0); - DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED); -} - - -/* - Read the next position in the index. -*/ -int ha_tina::index_next(byte * buf) -{ - DBUG_ENTER("ha_tina::index_next"); - DBUG_ASSERT(0); - DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED); -} - -/* - Read the previous position in the index. -*/ -int ha_tina::index_prev(byte * buf) -{ - DBUG_ENTER("ha_tina::index_prev"); - DBUG_ASSERT(0); - DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED); -} - -/* - Read the first position in the index -*/ -int ha_tina::index_first(byte * buf) -{ - DBUG_ENTER("ha_tina::index_first"); - DBUG_ASSERT(0); - DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED); -} - -/* - Read the last position in the index - With this we don't need to do a filesort() with index. - We just read the last row and call previous. -*/ -int ha_tina::index_last(byte * buf) -{ - DBUG_ENTER("ha_tina::index_last"); - DBUG_ASSERT(0); - DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED); -} - -/* - All table scans call this first. - The order of a table scan is: - - ha_tina::store_lock - ha_tina::external_lock - ha_tina::info - ha_tina::rnd_init - ha_tina::extra - ENUM HA_EXTRA_CACHE Cash record in HA_rrnd() - ha_tina::rnd_next - ha_tina::rnd_next - ha_tina::rnd_next - ha_tina::rnd_next - ha_tina::rnd_next - ha_tina::rnd_next - ha_tina::rnd_next - ha_tina::rnd_next - ha_tina::rnd_next - ha_tina::extra - ENUM HA_EXTRA_NO_CACHE End cacheing of records (def) - ha_tina::external_lock - ha_tina::extra - ENUM HA_EXTRA_RESET Reset database to after open - - Each call to ::rnd_next() represents a row returned in the can. When no more - rows can be returned, rnd_next() returns a value of HA_ERR_END_OF_FILE. - The ::info() call is just for the optimizer. - -*/ - -int ha_tina::rnd_init(bool scan) -{ - DBUG_ENTER("ha_tina::rnd_init"); - - current_position= next_position= 0; - records= 0; - records_is_known= 0; - chain_ptr= chain; -#ifdef HAVE_MADVISE - if (scan) - (void)madvise(share->mapped_file,share->file_stat.st_size,MADV_SEQUENTIAL); -#endif - - DBUG_RETURN(0); -} - -/* - ::rnd_next() does all the heavy lifting for a table scan. You will need to populate *buf - with the correct field data. You can walk the field to determine at what position you - should store the data (take a look at how ::find_current_row() works). The structure - is something like: - 0Foo Dog Friend - The first offset is for the first attribute. All space before that is reserved for null count. - Basically this works as a mask for which rows are nulled (compared to just empty). - This table handler doesn't do nulls and does not know the difference between NULL and "". This - is ok since this table handler is for spreadsheets and they don't know about them either :) -*/ -int ha_tina::rnd_next(byte *buf) -{ - DBUG_ENTER("ha_tina::rnd_next"); - - statistic_increment(table->in_use->status_var.ha_read_rnd_next_count, - &LOCK_status); - - current_position= next_position; - if (!share->mapped_file) - DBUG_RETURN(HA_ERR_END_OF_FILE); - if (HA_ERR_END_OF_FILE == find_current_row(buf) ) - DBUG_RETURN(HA_ERR_END_OF_FILE); - - records++; - DBUG_RETURN(0); -} - -/* - In the case of an order by rows will need to be sorted. - ::position() is called after each call to ::rnd_next(), - the data it stores is to a byte array. You can store this - data via my_store_ptr(). ref_length is a variable defined to the - class that is the sizeof() of position being stored. In our case - its just a position. Look at the bdb code if you want to see a case - where something other then a number is stored. -*/ -void ha_tina::position(const byte *record) -{ - DBUG_ENTER("ha_tina::position"); - my_store_ptr(ref, ref_length, current_position); - DBUG_VOID_RETURN; -} - - -/* - Used to fetch a row from a posiion stored with ::position(). - my_get_ptr() retrieves the data for you. -*/ - -int ha_tina::rnd_pos(byte * buf, byte *pos) -{ - DBUG_ENTER("ha_tina::rnd_pos"); - statistic_increment(table->in_use->status_var.ha_read_rnd_next_count, - &LOCK_status); - current_position= my_get_ptr(pos,ref_length); - DBUG_RETURN(find_current_row(buf)); -} - -/* - ::info() is used to return information to the optimizer. - Currently this table handler doesn't implement most of the fields - really needed. SHOW also makes use of this data -*/ -void ha_tina::info(uint flag) -{ - DBUG_ENTER("ha_tina::info"); - /* This is a lie, but you don't want the optimizer to see zero or 1 */ - if (!records_is_known && records < 2) - records= 2; - DBUG_VOID_RETURN; -} - -/* - Grab bag of flags that are sent to the able handler every so often. - HA_EXTRA_RESET and HA_EXTRA_RESET_STATE are the most frequently called. - You are not required to implement any of these. -*/ -int ha_tina::extra(enum ha_extra_function operation) -{ - DBUG_ENTER("ha_tina::extra"); - DBUG_RETURN(0); -} - -/* - This is no longer used. -*/ -int ha_tina::reset(void) -{ - DBUG_ENTER("ha_tina::reset"); - ha_tina::extra(HA_EXTRA_RESET); - DBUG_RETURN(0); -} - - -/* - Called after deletes, inserts, and updates. This is where we clean up all of - the dead space we have collected while writing the file. -*/ -int ha_tina::rnd_end() -{ - DBUG_ENTER("ha_tina::rnd_end"); - - records_is_known= 1; - - /* First position will be truncate position, second will be increment */ - if ((chain_ptr - chain) > 0) - { - tina_set *ptr; - off_t length; - - /* - Setting up writable map, this will contain all of the data after the - get_mmap call that we have added to the file. - */ - if (get_mmap(share, 1) > 0) - DBUG_RETURN(-1); - length= share->file_stat.st_size; - - /* - The sort handles updates/deletes with random orders. - It also sorts so that we move the final blocks to the - beginning so that we move the smallest amount of data possible. - */ - qsort(chain, (size_t)(chain_ptr - chain), sizeof(tina_set), (qsort_cmp)sort_set); - for (ptr= chain; ptr < chain_ptr; ptr++) - { - memmove(share->mapped_file + ptr->begin, share->mapped_file + ptr->end, - length - (size_t)ptr->end); - length= length - (size_t)(ptr->end - ptr->begin); - } - - /* Truncate the file to the new size */ - if (my_chsize(share->data_file, length, 0, MYF(MY_WME))) - DBUG_RETURN(-1); - - if (munmap(share->mapped_file, length)) - DBUG_RETURN(-1); - - /* We set it to null so that get_mmap() won't try to unmap it */ - share->mapped_file= NULL; - if (get_mmap(share, 0) > 0) - DBUG_RETURN(-1); - } - - DBUG_RETURN(0); -} - -/* - DELETE without WHERE calls it -*/ -int ha_tina::delete_all_rows() -{ - DBUG_ENTER("ha_tina::delete_all_rows"); - - if (!records_is_known) - return (my_errno=HA_ERR_WRONG_COMMAND); - - int rc= my_chsize(share->data_file, 0, 0, MYF(MY_WME)); - - if (get_mmap(share, 0) > 0) - DBUG_RETURN(-1); - - records=0; - DBUG_RETURN(rc); -} - -/* - Always called by the start of a transaction (or by "lock tables"); -*/ -int ha_tina::external_lock(THD *thd, int lock_type) -{ - DBUG_ENTER("ha_tina::external_lock"); - DBUG_RETURN(0); // No external locking -} - -/* - Called by the database to lock the table. Keep in mind that this - is an internal lock. -*/ -THR_LOCK_DATA **ha_tina::store_lock(THD *thd, - THR_LOCK_DATA **to, - enum thr_lock_type lock_type) -{ - if (lock_type != TL_IGNORE && lock.type == TL_UNLOCK) - lock.type=lock_type; - *to++= &lock; - return to; -} - -/* - Create a table. You do not want to leave the table open after a call to - this (the database will call ::open() if it needs to). -*/ - -int ha_tina::create(const char *name, TABLE *table_arg, HA_CREATE_INFO *create_info) -{ - char name_buff[FN_REFLEN]; - File create_file; - DBUG_ENTER("ha_tina::create"); - - if ((create_file= my_create(fn_format(name_buff,name,"",".CSV",MY_REPLACE_EXT|MY_UNPACK_FILENAME),0, - O_RDWR | O_TRUNC,MYF(MY_WME))) < 0) - DBUG_RETURN(-1); - - my_close(create_file,MYF(0)); - - DBUG_RETURN(0); -} - -#endif /* enable CSV */ diff --git a/sql/examples/ha_tina.h b/sql/examples/ha_tina.h deleted file mode 100644 index 97659f99dd9..00000000000 --- a/sql/examples/ha_tina.h +++ /dev/null @@ -1,125 +0,0 @@ -/* Copyright (C) 2003 MySQL AB - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - -#include <sys/types.h> -#include <sys/stat.h> -#include <my_dir.h> - -#define DEFAULT_CHAIN_LENGTH 512 - -typedef struct st_tina_share { - char *table_name; - byte *mapped_file; /* mapped region of file */ - uint table_name_length,use_count; - MY_STAT file_stat; /* Stat information for the data file */ - File data_file; /* Current open data file */ - pthread_mutex_t mutex; - THR_LOCK lock; -} TINA_SHARE; - -typedef struct tina_set { - off_t begin; - off_t end; -}; - -class ha_tina: public handler -{ - THR_LOCK_DATA lock; /* MySQL lock */ - TINA_SHARE *share; /* Shared lock info */ - off_t current_position; /* Current position in the file during a file scan */ - off_t next_position; /* Next position in the file scan */ - byte byte_buffer[IO_SIZE]; - String buffer; - tina_set chain_buffer[DEFAULT_CHAIN_LENGTH]; - tina_set *chain; - tina_set *chain_ptr; - byte chain_alloced; - uint32 chain_size; - bool records_is_known; - -public: - ha_tina(TABLE *table_arg); - ~ha_tina() - { - if (chain_alloced) - my_free((gptr)chain,0); - } - const char *table_type() const { return "CSV"; } - const char *index_type(uint inx) { return "NONE"; } - const char **bas_ext() const; - ulong table_flags() const - { - return (HA_REC_NOT_IN_SEQ | HA_NOT_EXACT_COUNT | - HA_NO_AUTO_INCREMENT ); - } - ulong index_flags(uint idx, uint part, bool all_parts) const - { - /* We will never have indexes so this will never be called(AKA we return zero) */ - return 0; - } - uint max_record_length() const { return HA_MAX_REC_LENGTH; } - uint max_keys() const { return 0; } - uint max_key_parts() const { return 0; } - uint max_key_length() const { return 0; } - /* - Called in test_quick_select to determine if indexes should be used. - */ - virtual double scan_time() { return (double) (records+deleted) / 20.0+10; } - /* The next method will never be called */ - virtual bool fast_key_read() { return 1;} - /* - TODO: return actual upper bound of number of records in the table. - (e.g. save number of records seen on full table scan and/or use file size - as upper bound) - */ - ha_rows estimate_rows_upper_bound() { return HA_POS_ERROR; } - - int open(const char *name, int mode, uint test_if_locked); - int close(void); - int write_row(byte * buf); - int update_row(const byte * old_data, byte * new_data); - int delete_row(const byte * buf); - int index_read(byte * buf, const byte * key, - uint key_len, enum ha_rkey_function find_flag); - int index_read_idx(byte * buf, uint idx, const byte * key, - uint key_len, enum ha_rkey_function find_flag); - int index_next(byte * buf); - int index_prev(byte * buf); - int index_first(byte * buf); - int index_last(byte * buf); - int rnd_init(bool scan=1); - int rnd_next(byte *buf); - int rnd_pos(byte * buf, byte *pos); - int rnd_end(); - void position(const byte *record); - void info(uint); - int extra(enum ha_extra_function operation); - int reset(void); - int external_lock(THD *thd, int lock_type); - int delete_all_rows(void); - int create(const char *name, TABLE *form, HA_CREATE_INFO *create_info); - - THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to, - enum thr_lock_type lock_type); - - /* The following methods were added just for TINA */ - int encode_quote(byte *buf); - int find_current_row(byte *buf); - int chain_append(); -}; - -bool tina_end(); - diff --git a/sql/field.cc b/sql/field.cc index eab62cd1958..65c0d1b9397 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -67,6 +67,7 @@ inline int field_type2index (enum_field_types field_type) ((int)FIELDTYPE_TEAR_FROM) + (field_type - FIELDTYPE_TEAR_TO) - 1); } + static enum_field_types field_types_merge_rules [FIELDTYPE_NUM][FIELDTYPE_NUM]= { /* MYSQL_TYPE_DECIMAL -> */ @@ -1023,10 +1024,9 @@ bool Field::type_can_have_key_part(enum enum_field_types type) Field_num::Field_num(char *ptr_arg,uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg, uint8 dec_arg, bool zero_arg, bool unsigned_arg) :Field(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, - unireg_check_arg, field_name_arg, table_arg), + unireg_check_arg, field_name_arg), dec(dec_arg),zerofill(zero_arg),unsigned_flag(unsigned_arg) { if (zerofill) @@ -1215,16 +1215,11 @@ String *Field::val_int_as_str(String *val_buffer, my_bool unsigned_val) } -/* This is used as a table name when the table structure is not set up */ -const char *unknown_table_name= 0; - Field::Field(char *ptr_arg,uint32 length_arg,uchar *null_ptr_arg, uchar null_bit_arg, - utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg) + utype unireg_check_arg, const char *field_name_arg) :ptr(ptr_arg),null_ptr(null_ptr_arg), - table(table_arg),orig_table(table_arg), - table_name(table_arg ? &table_arg->alias : &unknown_table_name), + table(0), orig_table(0), table_name(0), field_name(field_name_arg), query_id(0), key_start(0), part_of_key(0), part_of_sortkey(0), unireg_check(unireg_check_arg), @@ -1233,6 +1228,7 @@ Field::Field(char *ptr_arg,uint32 length_arg,uchar *null_ptr_arg, flags=null_ptr ? 0: NOT_NULL_FLAG; comment.str= (char*) ""; comment.length=0; + fieldnr= 0; } uint Field::offset() @@ -1280,10 +1276,10 @@ void Field_num::add_zerofill_and_unsigned(String &res) const void Field::make_field(Send_field *field) { - if (orig_table->s->table_cache_key && *(orig_table->s->table_cache_key)) + if (orig_table->s->db.str && *orig_table->s->db.str) { - field->org_table_name= orig_table->s->table_name; - field->db_name= orig_table->s->table_cache_key; + field->db_name= orig_table->s->db.str; + field->org_table_name= orig_table->s->table_name.str; } else field->org_table_name= field->db_name= ""; @@ -1391,10 +1387,9 @@ my_decimal* Field_num::val_decimal(my_decimal *decimal_value) Field_str::Field_str(char *ptr_arg,uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, utype unireg_check_arg, - const char *field_name_arg, - struct st_table *table_arg,CHARSET_INFO *charset) + const char *field_name_arg, CHARSET_INFO *charset) :Field(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, - unireg_check_arg, field_name_arg, table_arg) + unireg_check_arg, field_name_arg) { field_charset=charset; if (charset->state & MY_CS_BINSORT) @@ -1527,7 +1522,7 @@ Field *Field::new_field(MEM_ROOT *root, struct st_table *new_table) tmp->key_start.init(0); tmp->part_of_key.init(0); tmp->part_of_sortkey.init(0); - tmp->unireg_check=Field::NONE; + tmp->unireg_check= Field::NONE; tmp->flags&= (NOT_NULL_FLAG | BLOB_FLAG | UNSIGNED_FLAG | ZEROFILL_FLAG | BINARY_FLAG | ENUM_FLAG | SET_FLAG); tmp->reset_fields(); @@ -1648,6 +1643,21 @@ bool Field::needs_quotes(void) } +/* This is used to generate a field in TABLE from TABLE_SHARE */ + +Field *Field::clone(MEM_ROOT *root, struct st_table *new_table) +{ + Field *tmp; + if ((tmp= (Field*) memdup_root(root,(char*) this,size_of()))) + { + tmp->init(new_table); + tmp->move_field_offset((my_ptrdiff_t) (new_table->record[0] - + new_table->s->default_values)); + } + return tmp; +} + + /**************************************************************************** Field_null, a field that always return NULL ****************************************************************************/ @@ -2277,13 +2287,10 @@ Field_new_decimal::Field_new_decimal(char *ptr_arg, uchar null_bit_arg, enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg, uint8 dec_arg,bool zero_arg, bool unsigned_arg) - :Field_num(ptr_arg, len_arg, - null_ptr_arg, null_bit_arg, - unireg_check_arg, field_name_arg, table_arg, - dec_arg, zero_arg, unsigned_arg) + :Field_num(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, + unireg_check_arg, field_name_arg, dec_arg, zero_arg, unsigned_arg) { precision= my_decimal_length_to_precision(len_arg, dec_arg, unsigned_arg); DBUG_ASSERT((precision <= DECIMAL_MAX_PRECISION) && @@ -2295,14 +2302,11 @@ Field_new_decimal::Field_new_decimal(char *ptr_arg, Field_new_decimal::Field_new_decimal(uint32 len_arg, bool maybe_null, const char *name, - struct st_table *t_arg, uint8 dec_arg, bool unsigned_arg) :Field_num((char*) 0, len_arg, maybe_null ? (uchar*) "": 0, 0, - NONE, name, t_arg, - dec_arg, - 0, unsigned_arg) + NONE, name, dec_arg, 0, unsigned_arg) { precision= my_decimal_length_to_precision(len_arg, dec_arg, unsigned_arg); DBUG_ASSERT((precision <= DECIMAL_MAX_PRECISION) && @@ -2381,7 +2385,7 @@ bool Field_new_decimal::store_value(const my_decimal *decimal_value) #ifndef DBUG_OFF { char dbug_buff[DECIMAL_MAX_STR_LENGTH+1]; - DBUG_PRINT("info", ("saving with precision %d, scale: %d, value %s", + DBUG_PRINT("info", ("saving with precision %d scale: %d value %s", (int)precision, (int)dec, dbug_decimal_as_string(dbug_buff, decimal_value))); } @@ -2396,7 +2400,8 @@ bool Field_new_decimal::store_value(const my_decimal *decimal_value) my_decimal2binary(E_DEC_FATAL_ERROR, &buff, ptr, precision, dec); error= 1; } - DBUG_EXECUTE("info", print_decimal_buff(decimal_value, (byte *) ptr, bin_size);); + DBUG_EXECUTE("info", print_decimal_buff(decimal_value, (byte *) ptr, + bin_size);); DBUG_RETURN(error); } @@ -4443,19 +4448,18 @@ Field_timestamp::Field_timestamp(char *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg, + TABLE_SHARE *share, CHARSET_INFO *cs) :Field_str(ptr_arg, 19, null_ptr_arg, null_bit_arg, - unireg_check_arg, field_name_arg, table_arg, cs) + unireg_check_arg, field_name_arg, cs) { /* For 4.0 MYD and 4.0 InnoDB compatibility */ flags|= ZEROFILL_FLAG | UNSIGNED_FLAG; - if (table && !table->timestamp_field && - unireg_check != NONE) + if (!share->timestamp_field && unireg_check != NONE) { /* This timestamp has auto-update */ - table->timestamp_field= this; - flags|=TIMESTAMP_FLAG; + share->timestamp_field= this; + flags|= TIMESTAMP_FLAG; } } @@ -5959,6 +5963,26 @@ int Field_str::store(double nr) } +uint Field::is_equal(create_field *new_field) +{ + return (new_field->sql_type == type()); +} + + +uint Field_str::is_equal(create_field *new_field) +{ + if (((new_field->flags & (BINCMP_FLAG | BINARY_FLAG)) && + !(flags & (BINCMP_FLAG | BINARY_FLAG))) || + (!(new_field->flags & (BINCMP_FLAG | BINARY_FLAG)) && + (flags & (BINCMP_FLAG | BINARY_FLAG)))) + return 0; /* One of the fields is binary and the other one isn't */ + + return ((new_field->sql_type == type()) && + new_field->charset == field_charset && + new_field->length == max_length()); +} + + int Field_string::store(longlong nr, bool unsigned_val) { char buff[64]; @@ -6202,8 +6226,7 @@ uint Field_string::max_packed_col_length(uint max_length) Field *Field_string::new_field(MEM_ROOT *root, struct st_table *new_table) { - Field *new_field; - + Field *field; if (type() != MYSQL_TYPE_VAR_STRING || table == new_table) return Field::new_field(root, new_table); @@ -6212,19 +6235,23 @@ Field *Field_string::new_field(MEM_ROOT *root, struct st_table *new_table) This is done to ensure that ALTER TABLE will convert old VARCHAR fields to now VARCHAR fields. */ - if ((new_field= new Field_varstring(field_length, maybe_null(), - field_name, new_table, charset()))) + if ((field= new Field_varstring(field_length, maybe_null(), field_name, + new_table->s, charset()))) { + field->init(new_table); /* delayed_insert::get_local_table() needs a ptr copied from old table. This is what other new_field() methods do too. The above method of Field_varstring sets ptr to NULL. */ - new_field->ptr= ptr; + field->ptr= ptr; + field->null_ptr= null_ptr; + field->null_bit= null_bit; } - return new_field; + return field; } + /**************************************************************************** VARCHAR type Data in field->ptr is stored as: @@ -6353,7 +6380,8 @@ my_decimal *Field_varstring::val_decimal(my_decimal *decimal_value) } -int Field_varstring::cmp(const char *a_ptr, const char *b_ptr) +int Field_varstring::cmp_max(const char *a_ptr, const char *b_ptr, + uint max_len) { uint a_length, b_length; int diff; @@ -6368,6 +6396,8 @@ int Field_varstring::cmp(const char *a_ptr, const char *b_ptr) a_length= uint2korr(a_ptr); b_length= uint2korr(b_ptr); } + set_if_smaller(a_length, max_len); + set_if_smaller(b_length, max_len); diff= field_charset->coll->strnncollsp(field_charset, (const uchar*) a_ptr+ length_bytes, @@ -6742,6 +6772,22 @@ Field *Field_varstring::new_key_field(MEM_ROOT *root, } +uint Field_varstring::is_equal(create_field *new_field) +{ + if (new_field->sql_type == type() && + new_field->charset == field_charset) + { + if (new_field->length == max_length()) + return IS_EQUAL_YES; + if (new_field->length > max_length() && + ((new_field->length <= 255 && max_length() <= 255) || + (new_field->length > 255 && max_length() > 255))) + return IS_EQUAL_PACK_LENGTH; // VARCHAR, longer variable length + } + return IS_EQUAL_NO; +} + + /**************************************************************************** ** blob type ** A blob is saved as a length and a pointer. The length is stored in the @@ -6750,19 +6796,16 @@ Field *Field_varstring::new_key_field(MEM_ROOT *root, Field_blob::Field_blob(char *ptr_arg, uchar *null_ptr_arg, uchar null_bit_arg, enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg,uint blob_pack_length, + TABLE_SHARE *share, uint blob_pack_length, CHARSET_INFO *cs) :Field_longstr(ptr_arg, BLOB_PACK_LENGTH_TO_MAX_LENGH(blob_pack_length), - null_ptr_arg, null_bit_arg, unireg_check_arg, field_name_arg, - table_arg, cs), + null_ptr_arg, null_bit_arg, unireg_check_arg, field_name_arg, + cs), packlength(blob_pack_length) { flags|= BLOB_FLAG; - if (table) - { - table->s->blob_fields++; - /* TODO: why do not fill table->s->blob_field array here? */ - } + share->blob_fields++; + /* TODO: why do not fill table->s->blob_field array here? */ } @@ -7016,13 +7059,16 @@ int Field_blob::cmp(const char *a,uint32 a_length, const char *b, } -int Field_blob::cmp(const char *a_ptr, const char *b_ptr) +int Field_blob::cmp_max(const char *a_ptr, const char *b_ptr, + uint max_length) { char *blob1,*blob2; memcpy_fixed(&blob1,a_ptr+packlength,sizeof(char*)); memcpy_fixed(&blob2,b_ptr+packlength,sizeof(char*)); - return Field_blob::cmp(blob1,get_length(a_ptr), - blob2,get_length(b_ptr)); + uint a_len= get_length(a_ptr), b_len= get_length(b_ptr); + set_if_smaller(a_len, max_length); + set_if_smaller(b_len, max_length); + return Field_blob::cmp(blob1,a_len,blob2,b_len); } @@ -7885,6 +7931,17 @@ bool Field_num::eq_def(Field *field) } +uint Field_num::is_equal(create_field *new_field) +{ + return ((new_field->sql_type == type()) && + ((new_field->flags & UNSIGNED_FLAG) == (uint) (flags & + UNSIGNED_FLAG)) && + ((new_field->flags & AUTO_INCREMENT_FLAG) == + (uint) (flags & AUTO_INCREMENT_FLAG)) && + (new_field->length >= max_length())); +} + + /* Bit field. @@ -7916,10 +7973,9 @@ bool Field_num::eq_def(Field *field) Field_bit::Field_bit(char *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, uchar *bit_ptr_arg, uchar bit_ofs_arg, - enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg) + enum utype unireg_check_arg, const char *field_name_arg) : Field(ptr_arg, len_arg >> 3, null_ptr_arg, null_bit_arg, - unireg_check_arg, field_name_arg, table_arg), + unireg_check_arg, field_name_arg), bit_ptr(bit_ptr_arg), bit_ofs(bit_ofs_arg), bit_len(len_arg & 7) { /* @@ -8072,6 +8128,35 @@ my_decimal *Field_bit::val_decimal(my_decimal *deciaml_value) } +/* + Compare two bit fields using pointers within the record. + SYNOPSIS + cmp_max() + a Pointer to field->ptr in first record + b Pointer to field->ptr in second record + max_len Maximum length used in index + DESCRIPTION + This method is used from key_rec_cmp used by merge sorts used + by partitioned index read and later other similar places. + The a and b pointer must be pointers to the field in a record + (not the table->record[0] necessarily) +*/ +int Field_bit::cmp_max(const char *a, const char *b, uint max_len) +{ + my_ptrdiff_t a_diff= a - ptr; + my_ptrdiff_t b_diff= b - ptr; + if (bit_len) + { + int flag; + uchar bits_a= get_rec_bits(bit_ptr+a_diff, bit_ofs, bit_len); + uchar bits_b= get_rec_bits(bit_ptr+b_diff, bit_ofs, bit_len); + if ((flag= (int) (bits_a - bits_b))) + return flag; + } + return memcmp(a, b, field_length); +} + + int Field_bit::key_cmp(const byte *str, uint length) { if (bit_len) @@ -8156,11 +8241,10 @@ const char *Field_bit::unpack(char *to, const char *from) Field_bit_as_char::Field_bit_as_char(char *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, enum utype unireg_check_arg, - const char *field_name_arg, - struct st_table *table_arg) - : Field_bit(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, 0, - 0, unireg_check_arg, field_name_arg, table_arg), - create_length(len_arg) + const char *field_name_arg) + :Field_bit(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, 0, 0, + unireg_check_arg, field_name_arg), + create_length(len_arg) { bit_len= 0; field_length= ((len_arg + 7) & ~7) / 8; @@ -8696,7 +8780,7 @@ uint pack_length_to_packflag(uint type) } -Field *make_field(char *ptr, uint32 field_length, +Field *make_field(TABLE_SHARE *share, char *ptr, uint32 field_length, uchar *null_pos, uchar null_bit, uint pack_flag, enum_field_types field_type, @@ -8704,8 +8788,7 @@ Field *make_field(char *ptr, uint32 field_length, Field::geometry_type geom_type, Field::utype unireg_check, TYPELIB *interval, - const char *field_name, - struct st_table *table) + const char *field_name) { uchar *bit_ptr; uchar bit_offset; @@ -8751,13 +8834,14 @@ Field *make_field(char *ptr, uint32 field_length, field_type == FIELD_TYPE_DECIMAL || // 3.23 or 4.0 string field_type == MYSQL_TYPE_VAR_STRING) return new Field_string(ptr,field_length,null_pos,null_bit, - unireg_check, field_name, table, + unireg_check, field_name, field_charset); if (field_type == MYSQL_TYPE_VARCHAR) return new Field_varstring(ptr,field_length, HA_VARCHAR_PACKLENGTH(field_length), null_pos,null_bit, - unireg_check, field_name, table, + unireg_check, field_name, + share, field_charset); return 0; // Error } @@ -8769,22 +8853,22 @@ Field *make_field(char *ptr, uint32 field_length, #ifdef HAVE_SPATIAL if (f_is_geom(pack_flag)) return new Field_geom(ptr,null_pos,null_bit, - unireg_check, field_name, table, + unireg_check, field_name, share, pack_length, geom_type); #endif if (f_is_blob(pack_flag)) return new Field_blob(ptr,null_pos,null_bit, - unireg_check, field_name, table, + unireg_check, field_name, share, pack_length, field_charset); if (interval) { if (f_is_enum(pack_flag)) return new Field_enum(ptr,field_length,null_pos,null_bit, - unireg_check, field_name, table, + unireg_check, field_name, pack_length, interval, field_charset); else return new Field_set(ptr,field_length,null_pos,null_bit, - unireg_check, field_name, table, + unireg_check, field_name, pack_length, interval, field_charset); } } @@ -8792,80 +8876,82 @@ Field *make_field(char *ptr, uint32 field_length, switch (field_type) { case FIELD_TYPE_DECIMAL: return new Field_decimal(ptr,field_length,null_pos,null_bit, - unireg_check, field_name, table, + unireg_check, field_name, f_decimals(pack_flag), f_is_zerofill(pack_flag) != 0, f_is_dec(pack_flag) == 0); case FIELD_TYPE_NEWDECIMAL: return new Field_new_decimal(ptr,field_length,null_pos,null_bit, - unireg_check, field_name, table, + unireg_check, field_name, f_decimals(pack_flag), f_is_zerofill(pack_flag) != 0, f_is_dec(pack_flag) == 0); case FIELD_TYPE_FLOAT: return new Field_float(ptr,field_length,null_pos,null_bit, - unireg_check, field_name, table, + unireg_check, field_name, f_decimals(pack_flag), f_is_zerofill(pack_flag) != 0, f_is_dec(pack_flag)== 0); case FIELD_TYPE_DOUBLE: return new Field_double(ptr,field_length,null_pos,null_bit, - unireg_check, field_name, table, + unireg_check, field_name, f_decimals(pack_flag), f_is_zerofill(pack_flag) != 0, f_is_dec(pack_flag)== 0); case FIELD_TYPE_TINY: return new Field_tiny(ptr,field_length,null_pos,null_bit, - unireg_check, field_name, table, + unireg_check, field_name, f_is_zerofill(pack_flag) != 0, f_is_dec(pack_flag) == 0); case FIELD_TYPE_SHORT: return new Field_short(ptr,field_length,null_pos,null_bit, - unireg_check, field_name, table, + unireg_check, field_name, f_is_zerofill(pack_flag) != 0, f_is_dec(pack_flag) == 0); case FIELD_TYPE_INT24: return new Field_medium(ptr,field_length,null_pos,null_bit, - unireg_check, field_name, table, + unireg_check, field_name, f_is_zerofill(pack_flag) != 0, f_is_dec(pack_flag) == 0); case FIELD_TYPE_LONG: return new Field_long(ptr,field_length,null_pos,null_bit, - unireg_check, field_name, table, + unireg_check, field_name, f_is_zerofill(pack_flag) != 0, f_is_dec(pack_flag) == 0); case FIELD_TYPE_LONGLONG: return new Field_longlong(ptr,field_length,null_pos,null_bit, - unireg_check, field_name, table, + unireg_check, field_name, f_is_zerofill(pack_flag) != 0, f_is_dec(pack_flag) == 0); case FIELD_TYPE_TIMESTAMP: return new Field_timestamp(ptr,field_length, null_pos, null_bit, - unireg_check, field_name, table, + unireg_check, field_name, share, field_charset); case FIELD_TYPE_YEAR: return new Field_year(ptr,field_length,null_pos,null_bit, - unireg_check, field_name, table); + unireg_check, field_name); case FIELD_TYPE_DATE: return new Field_date(ptr,null_pos,null_bit, - unireg_check, field_name, table, field_charset); + unireg_check, field_name, field_charset); case FIELD_TYPE_NEWDATE: return new Field_newdate(ptr,null_pos,null_bit, - unireg_check, field_name, table, field_charset); + unireg_check, field_name, field_charset); case FIELD_TYPE_TIME: return new Field_time(ptr,null_pos,null_bit, - unireg_check, field_name, table, field_charset); + unireg_check, field_name, field_charset); case FIELD_TYPE_DATETIME: return new Field_datetime(ptr,null_pos,null_bit, - unireg_check, field_name, table, field_charset); + unireg_check, field_name, field_charset); case FIELD_TYPE_NULL: - return new Field_null(ptr,field_length,unireg_check,field_name,table, field_charset); + return new Field_null(ptr, field_length, unireg_check, field_name, + field_charset); case FIELD_TYPE_BIT: return f_bit_as_char(pack_flag) ? new Field_bit_as_char(ptr, field_length, null_pos, null_bit, - unireg_check, field_name, table) : + unireg_check, field_name) : new Field_bit(ptr, field_length, null_pos, null_bit, bit_ptr, - bit_offset, unireg_check, field_name, table); + bit_offset, unireg_check, field_name); + default: // Impossible (Wrong version) break; } @@ -8948,14 +9034,15 @@ create_field::create_field(Field *old_field,Field *orig_field) char buff[MAX_FIELD_WIDTH],*pos; String tmp(buff,sizeof(buff), charset), *res; my_ptrdiff_t diff; + bool is_null; /* Get the value from default_values */ diff= (my_ptrdiff_t) (orig_field->table->s->default_values- orig_field->table->record[0]); - orig_field->move_field(diff); // Points now at default_values - bool is_null=orig_field->is_real_null(); + orig_field->move_field_offset(diff); // Points now at default_values + is_null= orig_field->is_real_null(); res= orig_field->val_str(&tmp); - orig_field->move_field(-diff); // Back to record[0] + orig_field->move_field_offset(-diff); // Back to record[0] if (!is_null) { pos= (char*) sql_strmake(res->ptr(), res->length()); diff --git a/sql/field.h b/sql/field.h index e8dd7f05f99..3fbdc0d0aa9 100644 --- a/sql/field.h +++ b/sql/field.h @@ -29,6 +29,7 @@ class Send_field; class Protocol; +class create_field; struct st_cache_field; void field_conv(Field *to,Field *from); @@ -62,6 +63,7 @@ public: const char **table_name, *field_name; LEX_STRING comment; query_id_t query_id; // For quick test of used fields + bool add_index; // For check if field will be indexed /* Field is part of the following keys */ key_map key_start,part_of_key,part_of_sortkey; /* @@ -87,12 +89,15 @@ public: utype unireg_check; uint32 field_length; // Length of field uint field_index; // field number in fields array - uint16 flags; + uint32 flags; + /* fieldnr is the id of the field (first field = 1) as is also + used in key_part. + */ + uint16 fieldnr; uchar null_bit; // Bit used to test null bit Field(char *ptr_arg,uint32 length_arg,uchar *null_ptr_arg,uchar null_bit_arg, - utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg); + utype unireg_check_arg, const char *field_name_arg); virtual ~Field() {} /* Store functions returns 1 on overflow and -1 on fatal error */ virtual int store(const char *to,uint length,CHARSET_INFO *cs)=0; @@ -163,6 +168,8 @@ public: virtual enum_field_types type() const =0; virtual enum_field_types real_type() const { return type(); } inline int cmp(const char *str) { return cmp(ptr,str); } + virtual int cmp_max(const char *a, const char *b, uint max_len) + { return cmp(a, b); } virtual int cmp(const char *,const char *)=0; virtual int cmp_binary(const char *a,const char *b, uint32 max_length=~0L) { return memcmp(a,b,pack_length()); } @@ -193,6 +200,12 @@ public: return test(record[(uint) (null_ptr - (uchar*) table->record[0])] & null_bit); } + inline bool is_null_in_record_with_offset(my_ptrdiff_t offset) + { + if (!null_ptr) + return 0; + return test(null_ptr[offset] & null_bit); + } inline void set_null(int row_offset=0) { if (null_ptr) null_ptr[row_offset]|= null_bit; } inline void set_notnull(int row_offset=0) @@ -215,12 +228,13 @@ public: virtual Field *new_key_field(MEM_ROOT *root, struct st_table *new_table, char *new_ptr, uchar *new_null_ptr, uint new_null_bit); + Field *clone(MEM_ROOT *mem_root, struct st_table *new_table); inline void move_field(char *ptr_arg,uchar *null_ptr_arg,uchar null_bit_arg) { ptr=ptr_arg; null_ptr=null_ptr_arg; null_bit=null_bit_arg; } inline void move_field(char *ptr_arg) { ptr=ptr_arg; } - inline void move_field(my_ptrdiff_t ptr_diff) + virtual void move_field_offset(my_ptrdiff_t ptr_diff) { ptr=ADD_TO_PTR(ptr,ptr_diff,char*); if (null_ptr) @@ -241,7 +255,15 @@ public: ptr-=row_offset; return tmp; } - + inline longlong val_int(char *new_ptr) + { + char *old_ptr= ptr; + longlong return_value; + ptr= new_ptr; + return_value= val_int(); + ptr= old_ptr; + return return_value; + } inline String *val_str(String *str, char *new_ptr) { char *old_ptr= ptr; @@ -315,8 +337,16 @@ public: return (op_result == E_DEC_OVERFLOW); } int warn_if_overflow(int op_result); + void init(TABLE *table_arg) + { + orig_table= table= table_arg; + table_name= &table_arg->alias; + } + /* maximum possible display length */ virtual uint32 max_length()= 0; + + virtual uint is_equal(create_field *new_field); /* convert decimal to longlong with overflow check */ longlong convert_decimal2longlong(const my_decimal *val, bool unsigned_flag, int *err); @@ -345,7 +375,6 @@ public: Field_num(char *ptr_arg,uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg, uint8 dec_arg, bool zero_arg, bool unsigned_arg); Item_result result_type () const { return REAL_RESULT; } void prepend_zeros(String *value); @@ -357,6 +386,7 @@ public: bool eq_def(Field *field); int store_decimal(const my_decimal *); my_decimal *val_decimal(my_decimal *); + uint is_equal(create_field *new_field); }; @@ -366,8 +396,7 @@ protected: public: Field_str(char *ptr_arg,uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, utype unireg_check_arg, - const char *field_name_arg, - struct st_table *table_arg, CHARSET_INFO *charset); + const char *field_name_arg, CHARSET_INFO *charset); Item_result result_type () const { return STRING_RESULT; } uint decimals() const { return NOT_FIXED_DEC; } int store(double nr); @@ -381,6 +410,7 @@ public: uint32 max_length() { return field_length; } friend class create_field; my_decimal *val_decimal(my_decimal *); + uint is_equal(create_field *new_field); }; @@ -391,10 +421,9 @@ class Field_longstr :public Field_str public: Field_longstr(char *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, utype unireg_check_arg, - const char *field_name_arg, - struct st_table *table_arg,CHARSET_INFO *charset) + const char *field_name_arg, CHARSET_INFO *charset) :Field_str(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, unireg_check_arg, - field_name_arg, table_arg, charset) + field_name_arg, charset) {} int store_decimal(const my_decimal *d); @@ -403,17 +432,13 @@ public: /* base class for float and double and decimal (old one) */ class Field_real :public Field_num { public: - Field_real(char *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg, uint8 dec_arg, bool zero_arg, bool unsigned_arg) :Field_num(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, unireg_check_arg, - field_name_arg, table_arg, dec_arg, zero_arg, unsigned_arg) + field_name_arg, dec_arg, zero_arg, unsigned_arg) {} - - int store_decimal(const my_decimal *); my_decimal *val_decimal(my_decimal *); }; @@ -424,10 +449,9 @@ public: Field_decimal(char *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg, uint8 dec_arg,bool zero_arg,bool unsigned_arg) :Field_real(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, - unireg_check_arg, field_name_arg, table_arg, + unireg_check_arg, field_name_arg, dec_arg, zero_arg, unsigned_arg) {} enum_field_types type() const { return FIELD_TYPE_DECIMAL;} @@ -464,11 +488,9 @@ public: Field_new_decimal(char *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg, uint8 dec_arg, bool zero_arg, bool unsigned_arg); Field_new_decimal(uint32 len_arg, bool maybe_null_arg, - const char *field_name_arg, - struct st_table *table_arg, uint8 dec_arg, + const char *field_name_arg, uint8 dec_arg, bool unsigned_arg); enum_field_types type() const { return FIELD_TYPE_NEWDECIMAL;} enum ha_base_keytype key_type() const { return HA_KEYTYPE_BINARY; } @@ -499,10 +521,9 @@ public: Field_tiny(char *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg, bool zero_arg, bool unsigned_arg) :Field_num(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, - unireg_check_arg, field_name_arg, table_arg, + unireg_check_arg, field_name_arg, 0, zero_arg,unsigned_arg) {} enum Item_result result_type () const { return INT_RESULT; } @@ -530,16 +551,15 @@ public: Field_short(char *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg, bool zero_arg, bool unsigned_arg) :Field_num(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, - unireg_check_arg, field_name_arg, table_arg, + unireg_check_arg, field_name_arg, 0, zero_arg,unsigned_arg) {} Field_short(uint32 len_arg,bool maybe_null_arg, const char *field_name_arg, - struct st_table *table_arg,bool unsigned_arg) + bool unsigned_arg) :Field_num((char*) 0, len_arg, maybe_null_arg ? (uchar*) "": 0,0, - NONE, field_name_arg, table_arg,0,0,unsigned_arg) + NONE, field_name_arg, 0, 0, unsigned_arg) {} enum Item_result result_type () const { return INT_RESULT; } enum_field_types type() const { return FIELD_TYPE_SHORT;} @@ -566,10 +586,9 @@ public: Field_medium(char *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg, bool zero_arg, bool unsigned_arg) :Field_num(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, - unireg_check_arg, field_name_arg, table_arg, + unireg_check_arg, field_name_arg, 0, zero_arg,unsigned_arg) {} enum Item_result result_type () const { return INT_RESULT; } @@ -597,16 +616,15 @@ public: Field_long(char *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg, bool zero_arg, bool unsigned_arg) :Field_num(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, - unireg_check_arg, field_name_arg, table_arg, + unireg_check_arg, field_name_arg, 0, zero_arg,unsigned_arg) {} Field_long(uint32 len_arg,bool maybe_null_arg, const char *field_name_arg, - struct st_table *table_arg,bool unsigned_arg) + bool unsigned_arg) :Field_num((char*) 0, len_arg, maybe_null_arg ? (uchar*) "": 0,0, - NONE, field_name_arg, table_arg,0,0,unsigned_arg) + NONE, field_name_arg,0,0,unsigned_arg) {} enum Item_result result_type () const { return INT_RESULT; } enum_field_types type() const { return FIELD_TYPE_LONG;} @@ -634,17 +652,16 @@ public: Field_longlong(char *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg, bool zero_arg, bool unsigned_arg) :Field_num(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, - unireg_check_arg, field_name_arg, table_arg, + unireg_check_arg, field_name_arg, 0, zero_arg,unsigned_arg) {} Field_longlong(uint32 len_arg,bool maybe_null_arg, const char *field_name_arg, - struct st_table *table_arg, bool unsigned_arg) + bool unsigned_arg) :Field_num((char*) 0, len_arg, maybe_null_arg ? (uchar*) "": 0,0, - NONE, field_name_arg, table_arg,0,0,unsigned_arg) + NONE, field_name_arg,0,0,unsigned_arg) {} enum Item_result result_type () const { return INT_RESULT; } enum_field_types type() const { return FIELD_TYPE_LONGLONG;} @@ -673,16 +690,15 @@ public: Field_float(char *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg, uint8 dec_arg,bool zero_arg,bool unsigned_arg) :Field_real(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, - unireg_check_arg, field_name_arg, table_arg, + unireg_check_arg, field_name_arg, dec_arg, zero_arg, unsigned_arg) {} Field_float(uint32 len_arg, bool maybe_null_arg, const char *field_name_arg, - struct st_table *table_arg, uint8 dec_arg) + uint8 dec_arg) :Field_real((char*) 0, len_arg, maybe_null_arg ? (uchar*) "": 0, (uint) 0, - NONE, field_name_arg, table_arg, dec_arg, 0, 0) + NONE, field_name_arg, dec_arg, 0, 0) {} enum_field_types type() const { return FIELD_TYPE_FLOAT;} enum ha_base_keytype key_type() const { return HA_KEYTYPE_FLOAT; } @@ -707,16 +723,15 @@ public: Field_double(char *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg, uint8 dec_arg,bool zero_arg,bool unsigned_arg) :Field_real(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, - unireg_check_arg, field_name_arg, table_arg, + unireg_check_arg, field_name_arg, dec_arg, zero_arg, unsigned_arg) {} Field_double(uint32 len_arg, bool maybe_null_arg, const char *field_name_arg, - struct st_table *table_arg, uint8 dec_arg) + uint8 dec_arg) :Field_real((char*) 0, len_arg, maybe_null_arg ? (uchar*) "": 0, (uint) 0, - NONE, field_name_arg, table_arg, dec_arg, 0, 0) + NONE, field_name_arg, dec_arg, 0, 0) {} enum_field_types type() const { return FIELD_TYPE_DOUBLE;} enum ha_base_keytype key_type() const { return HA_KEYTYPE_DOUBLE; } @@ -743,9 +758,9 @@ class Field_null :public Field_str { public: Field_null(char *ptr_arg, uint32 len_arg, enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg, CHARSET_INFO *cs) + CHARSET_INFO *cs) :Field_str(ptr_arg, len_arg, null, 1, - unireg_check_arg, field_name_arg, table_arg, cs) + unireg_check_arg, field_name_arg, cs) {} enum_field_types type() const { return FIELD_TYPE_NULL;} int store(const char *to, uint length, CHARSET_INFO *cs) @@ -773,8 +788,7 @@ public: Field_timestamp(char *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg, - CHARSET_INFO *cs); + TABLE_SHARE *share, CHARSET_INFO *cs); enum_field_types type() const { return FIELD_TYPE_TIMESTAMP;} enum ha_base_keytype key_type() const { return HA_KEYTYPE_ULONG_INT; } enum Item_result cmp_type () const { return INT_RESULT; } @@ -824,10 +838,9 @@ class Field_year :public Field_tiny { public: Field_year(char *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, - enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg) + enum utype unireg_check_arg, const char *field_name_arg) :Field_tiny(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, - unireg_check_arg, field_name_arg, table_arg, 1, 1) + unireg_check_arg, field_name_arg, 1, 1) {} enum_field_types type() const { return FIELD_TYPE_YEAR;} int store(const char *to,uint length,CHARSET_INFO *charset); @@ -846,14 +859,14 @@ class Field_date :public Field_str { public: Field_date(char *ptr_arg, uchar *null_ptr_arg, uchar null_bit_arg, enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg, CHARSET_INFO *cs) + CHARSET_INFO *cs) :Field_str(ptr_arg, 10, null_ptr_arg, null_bit_arg, - unireg_check_arg, field_name_arg, table_arg, cs) + unireg_check_arg, field_name_arg, cs) {} Field_date(bool maybe_null_arg, const char *field_name_arg, - struct st_table *table_arg, CHARSET_INFO *cs) + CHARSET_INFO *cs) :Field_str((char*) 0,10, maybe_null_arg ? (uchar*) "": 0,0, - NONE, field_name_arg, table_arg, cs) {} + NONE, field_name_arg, cs) {} enum_field_types type() const { return FIELD_TYPE_DATE;} enum ha_base_keytype key_type() const { return HA_KEYTYPE_ULONG_INT; } enum Item_result cmp_type () const { return INT_RESULT; } @@ -873,13 +886,14 @@ public: bool zero_pack() const { return 1; } }; + class Field_newdate :public Field_str { public: Field_newdate(char *ptr_arg, uchar *null_ptr_arg, uchar null_bit_arg, enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg, CHARSET_INFO *cs) + CHARSET_INFO *cs) :Field_str(ptr_arg, 10, null_ptr_arg, null_bit_arg, - unireg_check_arg, field_name_arg, table_arg, cs) + unireg_check_arg, field_name_arg, cs) {} enum_field_types type() const { return FIELD_TYPE_DATE;} enum_field_types real_type() const { return FIELD_TYPE_NEWDATE; } @@ -909,14 +923,14 @@ class Field_time :public Field_str { public: Field_time(char *ptr_arg, uchar *null_ptr_arg, uchar null_bit_arg, enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg, CHARSET_INFO *cs) + CHARSET_INFO *cs) :Field_str(ptr_arg, 8, null_ptr_arg, null_bit_arg, - unireg_check_arg, field_name_arg, table_arg, cs) + unireg_check_arg, field_name_arg, cs) {} Field_time(bool maybe_null_arg, const char *field_name_arg, - struct st_table *table_arg, CHARSET_INFO *cs) + CHARSET_INFO *cs) :Field_str((char*) 0,8, maybe_null_arg ? (uchar*) "": 0,0, - NONE, field_name_arg, table_arg, cs) {} + NONE, field_name_arg, cs) {} enum_field_types type() const { return FIELD_TYPE_TIME;} enum ha_base_keytype key_type() const { return HA_KEYTYPE_INT24; } enum Item_result cmp_type () const { return INT_RESULT; } @@ -944,14 +958,14 @@ class Field_datetime :public Field_str { public: Field_datetime(char *ptr_arg, uchar *null_ptr_arg, uchar null_bit_arg, enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg, CHARSET_INFO *cs) + CHARSET_INFO *cs) :Field_str(ptr_arg, 19, null_ptr_arg, null_bit_arg, - unireg_check_arg, field_name_arg, table_arg, cs) + unireg_check_arg, field_name_arg, cs) {} Field_datetime(bool maybe_null_arg, const char *field_name_arg, - struct st_table *table_arg, CHARSET_INFO *cs) + CHARSET_INFO *cs) :Field_str((char*) 0,19, maybe_null_arg ? (uchar*) "": 0,0, - NONE, field_name_arg, table_arg, cs) {} + NONE, field_name_arg, cs) {} enum_field_types type() const { return FIELD_TYPE_DATETIME;} #ifdef HAVE_LONG_LONG enum ha_base_keytype key_type() const { return HA_KEYTYPE_ULONGLONG; } @@ -983,13 +997,13 @@ public: Field_string(char *ptr_arg, uint32 len_arg,uchar *null_ptr_arg, uchar null_bit_arg, enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg, CHARSET_INFO *cs) + CHARSET_INFO *cs) :Field_longstr(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, - unireg_check_arg, field_name_arg, table_arg, cs) {}; + unireg_check_arg, field_name_arg, cs) {}; Field_string(uint32 len_arg,bool maybe_null_arg, const char *field_name_arg, - struct st_table *table_arg, CHARSET_INFO *cs) + CHARSET_INFO *cs) :Field_longstr((char*) 0, len_arg, maybe_null_arg ? (uchar*) "": 0, 0, - NONE, field_name_arg, table_arg, cs) {}; + NONE, field_name_arg, cs) {}; enum_field_types type() const { @@ -1034,26 +1048,23 @@ public: uint32 length_bytes; Field_varstring(char *ptr_arg, uint32 len_arg, uint length_bytes_arg, - uchar *null_ptr_arg, - uchar null_bit_arg, + uchar *null_ptr_arg, uchar null_bit_arg, enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg, CHARSET_INFO *cs) + TABLE_SHARE *share, CHARSET_INFO *cs) :Field_longstr(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, - unireg_check_arg, field_name_arg, table_arg, cs), + unireg_check_arg, field_name_arg, cs), length_bytes(length_bytes_arg) { - if (table) - table->s->varchar_fields++; + share->varchar_fields++; } Field_varstring(uint32 len_arg,bool maybe_null_arg, const char *field_name_arg, - struct st_table *table_arg, CHARSET_INFO *cs) + TABLE_SHARE *share, CHARSET_INFO *cs) :Field_longstr((char*) 0,len_arg, maybe_null_arg ? (uchar*) "": 0, 0, - NONE, field_name_arg, table_arg, cs), + NONE, field_name_arg, cs), length_bytes(len_arg < 256 ? 1 :2) { - if (table) - table->s->varchar_fields++; + share->varchar_fields++; } enum_field_types type() const { return MYSQL_TYPE_VARCHAR; } @@ -1074,7 +1085,11 @@ public: longlong val_int(void); String *val_str(String*,String *); my_decimal *val_decimal(my_decimal *); - int cmp(const char *,const char*); + int cmp_max(const char *, const char *, uint max_length); + int cmp(const char *a,const char*b) + { + return cmp_max(a, b, ~0L); + } void sort_string(char *buff,uint length); void get_key_image(char *buff,uint length, imagetype type); void set_key_image(char *buff,uint length); @@ -1100,6 +1115,7 @@ public: Field *new_key_field(MEM_ROOT *root, struct st_table *new_table, char *new_ptr, uchar *new_null_ptr, uint new_null_bit); + uint is_equal(create_field *new_field); }; @@ -1110,12 +1126,11 @@ protected: public: Field_blob(char *ptr_arg, uchar *null_ptr_arg, uchar null_bit_arg, enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg,uint blob_pack_length, - CHARSET_INFO *cs); + TABLE_SHARE *share, uint blob_pack_length, CHARSET_INFO *cs); Field_blob(uint32 len_arg,bool maybe_null_arg, const char *field_name_arg, - struct st_table *table_arg, CHARSET_INFO *cs) - :Field_longstr((char*) 0,len_arg, maybe_null_arg ? (uchar*) "": 0, 0, - NONE, field_name_arg, table_arg, cs), + CHARSET_INFO *cs) + :Field_longstr((char*) 0, len_arg, maybe_null_arg ? (uchar*) "": 0, 0, + NONE, field_name_arg, cs), packlength(4) { flags|= BLOB_FLAG; @@ -1130,7 +1145,9 @@ public: longlong val_int(void); String *val_str(String*,String *); my_decimal *val_decimal(my_decimal *); - int cmp(const char *,const char*); + int cmp_max(const char *, const char *, uint max_length); + int cmp(const char *a,const char*b) + { return cmp_max(a, b, ~0L); } int cmp(const char *a, uint32 a_length, const char *b, uint32 b_length); int cmp_binary(const char *a,const char *b, uint32 max_length=~0L); int key_cmp(const byte *,const byte*); @@ -1155,6 +1172,10 @@ public: { memcpy_fixed(str,ptr+packlength,sizeof(char*)); } + inline void get_ptr(char **str, uint row_offset) + { + memcpy_fixed(str,ptr+packlength+row_offset,sizeof(char*)); + } inline void set_ptr(char *length,char *data) { memcpy(ptr,length,packlength); @@ -1206,15 +1227,14 @@ public: Field_geom(char *ptr_arg, uchar *null_ptr_arg, uint null_bit_arg, enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg,uint blob_pack_length, + TABLE_SHARE *share, uint blob_pack_length, enum geometry_type geom_type_arg) :Field_blob(ptr_arg, null_ptr_arg, null_bit_arg, unireg_check_arg, - field_name_arg, table_arg, blob_pack_length,&my_charset_bin) + field_name_arg, share, blob_pack_length, &my_charset_bin) { geom_type= geom_type_arg; } Field_geom(uint32 len_arg,bool maybe_null_arg, const char *field_name_arg, - struct st_table *table_arg, enum geometry_type geom_type_arg) - :Field_blob(len_arg, maybe_null_arg, field_name_arg, - table_arg, &my_charset_bin) + TABLE_SHARE *share, enum geometry_type geom_type_arg) + :Field_blob(len_arg, maybe_null_arg, field_name_arg, &my_charset_bin) { geom_type= geom_type_arg; } enum ha_base_keytype key_type() const { return HA_KEYTYPE_VARBINARY2; } enum_field_types type() const { return FIELD_TYPE_GEOMETRY; } @@ -1235,13 +1255,13 @@ protected: public: TYPELIB *typelib; Field_enum(char *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, - uchar null_bit_arg, - enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg,uint packlength_arg, - TYPELIB *typelib_arg, - CHARSET_INFO *charset_arg) + uchar null_bit_arg, + enum utype unireg_check_arg, const char *field_name_arg, + uint packlength_arg, + TYPELIB *typelib_arg, + CHARSET_INFO *charset_arg) :Field_str(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, - unireg_check_arg, field_name_arg, table_arg, charset_arg), + unireg_check_arg, field_name_arg, charset_arg), packlength(packlength_arg),typelib(typelib_arg) { flags|=ENUM_FLAG; @@ -1278,12 +1298,12 @@ public: Field_set(char *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg,uint32 packlength_arg, + uint32 packlength_arg, TYPELIB *typelib_arg, CHARSET_INFO *charset_arg) :Field_enum(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, unireg_check_arg, field_name_arg, - table_arg, packlength_arg, - typelib_arg,charset_arg) + packlength_arg, + typelib_arg,charset_arg) { flags=(flags & ~ENUM_FLAG) | SET_FLAG; } @@ -1298,6 +1318,20 @@ public: }; +/* + Note: + To use Field_bit::cmp_binary() you need to copy the bits stored in + the beginning of the record (the NULL bytes) to each memory you + want to compare (where the arguments point). + + This is the reason: + - Field_bit::cmp_binary() is only implemented in the base class + (Field::cmp_binary()). + - Field::cmp_binary() currenly use pack_length() to calculate how + long the data is. + - pack_length() includes size of the bits stored in the NULL bytes + of the record. +*/ class Field_bit :public Field { public: uchar *bit_ptr; // position in record where 'uneven' bits store @@ -1305,8 +1339,7 @@ public: uint bit_len; // number of 'uneven' high bits Field_bit(char *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, uchar *bit_ptr_arg, uchar bit_ofs_arg, - enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg); + enum utype unireg_check_arg, const char *field_name_arg); enum_field_types type() const { return FIELD_TYPE_BIT; } enum ha_base_keytype key_type() const { return HA_KEYTYPE_BIT; } uint32 key_length() const { return (uint32) field_length + (bit_len > 0); } @@ -1324,12 +1357,13 @@ public: my_decimal *val_decimal(my_decimal *); int cmp(const char *a, const char *b) { return cmp_binary(a, b); } + int cmp_binary_offset(uint row_offset) + { return cmp_offset(row_offset); } + int cmp_max(const char *a, const char *b, uint max_length); int key_cmp(const byte *a, const byte *b) { return cmp_binary((char *) a, (char *) b); } int key_cmp(const byte *str, uint length); int cmp_offset(uint row_offset); - int cmp_binary_offset(uint row_offset) - { return cmp_offset(row_offset); } void get_key_image(char *buff, uint length, imagetype type); void set_key_image(char *buff, uint length) { Field_bit::store(buff, length, &my_charset_bin); } @@ -1349,6 +1383,11 @@ public: bit_ptr= bit_ptr_arg; bit_ofs= bit_ofs_arg; } + void move_field_offset(my_ptrdiff_t ptr_diff) + { + Field::move_field_offset(ptr_diff); + bit_ptr= ADD_TO_PTR(bit_ptr, ptr_diff, uchar*); + } }; @@ -1357,8 +1396,7 @@ public: uchar create_length; Field_bit_as_char(char *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, - enum utype unireg_check_arg, const char *field_name_arg, - struct st_table *table_arg); + enum utype unireg_check_arg, const char *field_name_arg); enum ha_base_keytype key_type() const { return HA_KEYTYPE_BINARY; } uint32 max_length() { return (uint32) create_length; } uint size_of() const { return sizeof(*this); } @@ -1460,14 +1498,13 @@ public: }; -Field *make_field(char *ptr, uint32 field_length, +Field *make_field(TABLE_SHARE *share, char *ptr, uint32 field_length, uchar *null_pos, uchar null_bit, uint pack_flag, enum_field_types field_type, CHARSET_INFO *cs, Field::geometry_type geom_type, Field::utype unireg_check, - TYPELIB *interval, const char *field_name, - struct st_table *table); + TYPELIB *interval, const char *field_name); uint pack_length_to_packflag(uint type); enum_field_types get_blob_type_from_length(ulong length); uint32 calc_pack_length(enum_field_types type,uint32 length); diff --git a/sql/field_conv.cc b/sql/field_conv.cc index 895f022624c..0c385efd10b 100644 --- a/sql/field_conv.cc +++ b/sql/field_conv.cc @@ -178,8 +178,7 @@ set_field_to_null_with_conversions(Field *field, bool no_conversions) } if (current_thd->count_cuted_fields == CHECK_FIELD_WARN) { - field->set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, - ER_WARN_NULL_TO_NOTNULL, 1); + field->set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, ER_BAD_NULL_ERROR, 1); return 0; } if (!current_thd->no_errors) diff --git a/sql/ha_archive.cc b/sql/ha_archive.cc index cad81926a72..a8bc0822a85 100644 --- a/sql/ha_archive.cc +++ b/sql/ha_archive.cc @@ -20,7 +20,6 @@ #include "mysql_priv.h" -#if defined(HAVE_ARCHIVE_DB) #include "ha_archive.h" #include <my_dir.h> @@ -31,13 +30,13 @@ a storage engine without indexes that could compress data very well. So, welcome to a completely compressed storage engine. This storage engine only does inserts. No replace, deletes, or updates. All reads are - complete table scans. Compression is done through gzip (bzip compresses + complete table scans. Compression is done through azip (bzip compresses better, but only marginally, if someone asks I could add support for - it too, but beaware that it costs a lot more in CPU time then gzip). + it too, but beaware that it costs a lot more in CPU time then azip). We keep a file pointer open for each instance of ha_archive for each read but for writes we keep one open file handle just for that. We flush it - only if we have a read occur. gzip handles compressing lots of records + only if we have a read occur. azip handles compressing lots of records at once much better then doing lots of little records between writes. It is possible to not lock on writes but this would then mean we couldn't handle bulk inserts as well (that is if someone was trying to read at @@ -85,7 +84,7 @@ Add truncate table command. Implement versioning, should be easy. Allow for errors, find a way to mark bad rows. - Talk to the gzip guys, come up with a writable format so that updates are doable + Talk to the azip guys, come up with a writable format so that updates are doable without switching to a block method. Add optional feature so that rows can be flushed at interval (which will cause less compression but may speed up ordered searches). @@ -105,6 +104,7 @@ rows - This is an unsigned long long which is the number of rows in the data file. check point - Reserved for future use + auto increment - MAX value for autoincrement dirty - Status of the file, whether or not its values are the latest. This flag is what causes a repair to occur @@ -126,24 +126,28 @@ static HASH archive_open_tables; #define ARN ".ARN" // Files used during an optimize call #define ARM ".ARM" // Meta file /* - uchar + uchar + ulonglong + ulonglong + uchar + uchar + uchar + ulonglong + ulonglong + ulonglong + ulonglong + uchar */ -#define META_BUFFER_SIZE 19 // Size of the data used in the meta file +#define META_BUFFER_SIZE sizeof(uchar) + sizeof(uchar) + sizeof(ulonglong) \ + + sizeof(ulonglong) + sizeof(ulonglong) + sizeof(ulonglong) + sizeof(uchar) + /* uchar + uchar */ #define DATA_BUFFER_SIZE 2 // Size of the data used in the data file #define ARCHIVE_CHECK_HEADER 254 // The number we use to determine corruption -/* +/* Static declarations for handerton */ +static handler *archive_create_handler(TABLE_SHARE *table); +/* Number of rows that will force a bulk insert. */ #define ARCHIVE_MIN_ROWS_TO_USE_BULK_INSERT 2 - /* dummy handlerton - only to have something to return from archive_db_init */ handlerton archive_hton = { + MYSQL_HANDLERTON_INTERFACE_VERSION, "ARCHIVE", SHOW_OPTION_YES, "Archive storage engine", @@ -164,9 +168,25 @@ handlerton archive_hton = { NULL, /* create_cursor_read_view */ NULL, /* set_cursor_read_view */ NULL, /* close_cursor_read_view */ - HTON_NO_FLAGS + archive_create_handler, /* Create a new handler */ + NULL, /* Drop a database */ + archive_db_end, /* Panic call */ + NULL, /* Start Consistent Snapshot */ + NULL, /* Flush logs */ + NULL, /* Show status */ + NULL, /* Partition flags */ + NULL, /* Alter table flags */ + NULL, /* Alter interface */ + NULL, /* fill_files_table */ + HTON_NO_FLAGS, + NULL, /* binlog_func */ + NULL /* binlog_log_query */ }; +static handler *archive_create_handler(TABLE_SHARE *table) +{ + return new ha_archive(table); +} /* Used for hash table that tracks open tables. @@ -222,7 +242,7 @@ error: FALSE OK */ -bool archive_db_end() +int archive_db_end(ha_panic_function type) { if (archive_inited) { @@ -230,32 +250,31 @@ bool archive_db_end() VOID(pthread_mutex_destroy(&archive_mutex)); } archive_inited= 0; - return FALSE; + return 0; } -ha_archive::ha_archive(TABLE *table_arg) +ha_archive::ha_archive(TABLE_SHARE *table_arg) :handler(&archive_hton, table_arg), delayed_insert(0), bulk_insert(0) { /* Set our original buffer from pre-allocated memory */ buffer.set((char *)byte_buffer, IO_SIZE, system_charset_info); /* The size of the offset value we will use for position() */ - ref_length = 2 << ((zlibCompileFlags() >> 6) & 3); - DBUG_ASSERT(ref_length <= sizeof(z_off_t)); + ref_length = sizeof(my_off_t); } /* This method reads the header of a datafile and returns whether or not it was successful. */ -int ha_archive::read_data_header(gzFile file_to_read) +int ha_archive::read_data_header(azio_stream *file_to_read) { uchar data_buffer[DATA_BUFFER_SIZE]; DBUG_ENTER("ha_archive::read_data_header"); - if (gzrewind(file_to_read) == -1) + if (azrewind(file_to_read) == -1) DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE); - if (gzread(file_to_read, data_buffer, DATA_BUFFER_SIZE) != DATA_BUFFER_SIZE) + if (azread(file_to_read, data_buffer, DATA_BUFFER_SIZE) != DATA_BUFFER_SIZE) DBUG_RETURN(errno ? errno : -1); DBUG_PRINT("ha_archive::read_data_header", ("Check %u", data_buffer[0])); @@ -271,7 +290,7 @@ int ha_archive::read_data_header(gzFile file_to_read) /* This method writes out the header of a datafile and returns whether or not it was successful. */ -int ha_archive::write_data_header(gzFile file_to_write) +int ha_archive::write_data_header(azio_stream *file_to_write) { uchar data_buffer[DATA_BUFFER_SIZE]; DBUG_ENTER("ha_archive::write_data_header"); @@ -279,7 +298,7 @@ int ha_archive::write_data_header(gzFile file_to_write) data_buffer[0]= (uchar)ARCHIVE_CHECK_HEADER; data_buffer[1]= (uchar)ARCHIVE_VERSION; - if (gzwrite(file_to_write, &data_buffer, DATA_BUFFER_SIZE) != + if (azwrite(file_to_write, &data_buffer, DATA_BUFFER_SIZE) != DATA_BUFFER_SIZE) goto error; DBUG_PRINT("ha_archive::write_data_header", ("Check %u", (uint)data_buffer[0])); @@ -294,9 +313,12 @@ error: This method reads the header of a meta file and returns whether or not it was successful. *rows will contain the current number of rows in the data file upon success. */ -int ha_archive::read_meta_file(File meta_file, ha_rows *rows) +int ha_archive::read_meta_file(File meta_file, ha_rows *rows, + ulonglong *auto_increment, + ulonglong *forced_flushes) { uchar meta_buffer[META_BUFFER_SIZE]; + uchar *ptr= meta_buffer; ulonglong check_point; DBUG_ENTER("ha_archive::read_meta_file"); @@ -308,17 +330,27 @@ int ha_archive::read_meta_file(File meta_file, ha_rows *rows) /* Parse out the meta data, we ignore version at the moment */ - *rows= (ha_rows)uint8korr(meta_buffer + 2); - check_point= uint8korr(meta_buffer + 10); + + ptr+= sizeof(uchar)*2; // Move past header + *rows= (ha_rows)uint8korr(ptr); + ptr+= sizeof(ulonglong); // Move past rows + check_point= uint8korr(ptr); + ptr+= sizeof(ulonglong); // Move past check_point + *auto_increment= uint8korr(ptr); + ptr+= sizeof(ulonglong); // Move past auto_increment + *forced_flushes= uint8korr(ptr); + ptr+= sizeof(ulonglong); // Move past forced_flush DBUG_PRINT("ha_archive::read_meta_file", ("Check %d", (uint)meta_buffer[0])); DBUG_PRINT("ha_archive::read_meta_file", ("Version %d", (uint)meta_buffer[1])); - DBUG_PRINT("ha_archive::read_meta_file", ("Rows %lld", *rows)); - DBUG_PRINT("ha_archive::read_meta_file", ("Checkpoint %lld", check_point)); - DBUG_PRINT("ha_archive::read_meta_file", ("Dirty %d", (int)meta_buffer[18])); + DBUG_PRINT("ha_archive::read_meta_file", ("Rows %llu", *rows)); + DBUG_PRINT("ha_archive::read_meta_file", ("Checkpoint %llu", check_point)); + DBUG_PRINT("ha_archive::read_meta_file", ("Auto-Increment %llu", *auto_increment)); + DBUG_PRINT("ha_archive::read_meta_file", ("Forced Flushes %llu", *forced_flushes)); + DBUG_PRINT("ha_archive::read_meta_file", ("Dirty %d", (int)(*ptr))); if ((meta_buffer[0] != (uchar)ARCHIVE_CHECK_HEADER) || - ((bool)meta_buffer[18] == TRUE)) + ((bool)(*ptr)== TRUE)) DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE); my_sync(meta_file, MYF(MY_WME)); @@ -331,22 +363,40 @@ int ha_archive::read_meta_file(File meta_file, ha_rows *rows) By setting dirty you say whether or not the file represents the actual state of the data file. Upon ::open() we set to dirty, and upon ::close() we set to clean. */ -int ha_archive::write_meta_file(File meta_file, ha_rows rows, bool dirty) +int ha_archive::write_meta_file(File meta_file, ha_rows rows, + ulonglong auto_increment, + ulonglong forced_flushes, + bool dirty) { uchar meta_buffer[META_BUFFER_SIZE]; + uchar *ptr= meta_buffer; ulonglong check_point= 0; //Reserved for the future DBUG_ENTER("ha_archive::write_meta_file"); - meta_buffer[0]= (uchar)ARCHIVE_CHECK_HEADER; - meta_buffer[1]= (uchar)ARCHIVE_VERSION; - int8store(meta_buffer + 2, (ulonglong)rows); - int8store(meta_buffer + 10, check_point); - *(meta_buffer + 18)= (uchar)dirty; - DBUG_PRINT("ha_archive::write_meta_file", ("Check %d", (uint)ARCHIVE_CHECK_HEADER)); - DBUG_PRINT("ha_archive::write_meta_file", ("Version %d", (uint)ARCHIVE_VERSION)); + *ptr= (uchar)ARCHIVE_CHECK_HEADER; + ptr += sizeof(uchar); + *ptr= (uchar)ARCHIVE_VERSION; + ptr += sizeof(uchar); + int8store(ptr, (ulonglong)rows); + ptr += sizeof(ulonglong); + int8store(ptr, check_point); + ptr += sizeof(ulonglong); + int8store(ptr, auto_increment); + ptr += sizeof(ulonglong); + int8store(ptr, forced_flushes); + ptr += sizeof(ulonglong); + *ptr= (uchar)dirty; + DBUG_PRINT("ha_archive::write_meta_file", ("Check %d", + (uint)ARCHIVE_CHECK_HEADER)); + DBUG_PRINT("ha_archive::write_meta_file", ("Version %d", + (uint)ARCHIVE_VERSION)); DBUG_PRINT("ha_archive::write_meta_file", ("Rows %llu", (ulonglong)rows)); DBUG_PRINT("ha_archive::write_meta_file", ("Checkpoint %llu", check_point)); + DBUG_PRINT("ha_archive::write_meta_file", ("Auto Increment %llu", + auto_increment)); + DBUG_PRINT("ha_archive::write_meta_file", ("Forced Flushes %llu", + forced_flushes)); DBUG_PRINT("ha_archive::write_meta_file", ("Dirty %d", (uint)dirty)); VOID(my_seek(meta_file, 0, MY_SEEK_SET, MYF(0))); @@ -411,18 +461,26 @@ ARCHIVE_SHARE *ha_archive::get_share(const char *table_name, opposite. If the meta file will not open we assume it is crashed and leave it up to the user to fix. */ - if (read_meta_file(share->meta_file, &share->rows_recorded)) + if (read_meta_file(share->meta_file, &share->rows_recorded, + &share->auto_increment_value, + &share->forced_flushes)) share->crashed= TRUE; else - (void)write_meta_file(share->meta_file, share->rows_recorded, TRUE); - + (void)write_meta_file(share->meta_file, share->rows_recorded, + share->auto_increment_value, + share->forced_flushes, + TRUE); /* It is expensive to open and close the data files and since you can't have a gzip file that can be both read and written we keep a writer open that is shared amoung all open tables. */ - if ((share->archive_write= gzopen(share->data_file_name, "ab")) == NULL) + if (!(azopen(&(share->archive_write), share->data_file_name, + O_WRONLY|O_APPEND|O_BINARY))) + { + DBUG_PRINT("info", ("Could not open archive write file")); share->crashed= TRUE; + } VOID(my_hash_insert(&archive_open_tables, (byte*) share)); thr_lock_init(&share->lock); } @@ -456,11 +514,19 @@ int ha_archive::free_share(ARCHIVE_SHARE *share) hash_delete(&archive_open_tables, (byte*) share); thr_lock_delete(&share->lock); VOID(pthread_mutex_destroy(&share->mutex)); - if (share->crashed) - (void)write_meta_file(share->meta_file, share->rows_recorded, TRUE); - else - (void)write_meta_file(share->meta_file, share->rows_recorded, FALSE); - if (gzclose(share->archive_write) == Z_ERRNO) + /* + We need to make sure we don't reset the crashed state. + If we open a crashed file, wee need to close it as crashed unless + it has been repaired. + Since we will close the data down after this, we go on and count + the flush on close; + */ + share->forced_flushes++; + (void)write_meta_file(share->meta_file, share->rows_recorded, + share->auto_increment_value, + share->forced_flushes, + share->crashed ? TRUE :FALSE); + if (azclose(&(share->archive_write))) rc= 1; if (my_close(share->meta_file, MYF(0))) rc= 1; @@ -514,7 +580,7 @@ int ha_archive::open(const char *name, int mode, uint open_options) thr_lock_data_init(&share->lock,&lock,NULL); - if ((archive= gzopen(share->data_file_name, "rb")) == NULL) + if (!(azopen(&archive, share->data_file_name, O_RDONLY|O_BINARY))) { if (errno == EROFS || errno == EACCES) DBUG_RETURN(my_errno= errno); @@ -555,7 +621,7 @@ int ha_archive::close(void) DBUG_ENTER("ha_archive::close"); /* First close stream */ - if (gzclose(archive) == Z_ERRNO) + if (azclose(&archive)) rc= 1; /* then also close share */ rc|= free_share(share); @@ -581,6 +647,10 @@ int ha_archive::create(const char *name, TABLE *table_arg, int error; DBUG_ENTER("ha_archive::create"); + auto_increment_value= (create_info->auto_increment_value ? + create_info->auto_increment_value -1 : + (ulonglong) 0); + if ((create_file= my_create(fn_format(name_buff,name,"",ARM, MY_REPLACE_EXT|MY_UNPACK_FILENAME),0, O_RDWR | O_TRUNC,MYF(MY_WME))) < 0) @@ -588,7 +658,26 @@ int ha_archive::create(const char *name, TABLE *table_arg, error= my_errno; goto error; } - write_meta_file(create_file, 0, FALSE); + + for (uint key= 0; key < table_arg->s->keys; key++) + { + KEY *pos= table_arg->key_info+key; + KEY_PART_INFO *key_part= pos->key_part; + KEY_PART_INFO *key_part_end= key_part + pos->key_parts; + + for (; key_part != key_part_end; key_part++) + { + Field *field= key_part->field; + + if (!(field->flags & AUTO_INCREMENT_FLAG)) + { + error= -1; + goto error; + } + } + } + + write_meta_file(create_file, 0, auto_increment_value, 0, FALSE); my_close(create_file,MYF(0)); /* @@ -601,30 +690,28 @@ int ha_archive::create(const char *name, TABLE *table_arg, error= my_errno; goto error; } - if ((archive= gzdopen(create_file, "wb")) == NULL) + if (!azdopen(&archive, create_file, O_WRONLY|O_BINARY)) { error= errno; goto error2; } - if (write_data_header(archive)) + if (write_data_header(&archive)) { error= errno; goto error3; } - if (gzclose(archive)) + if (azclose(&archive)) { error= errno; goto error2; } - my_close(create_file, MYF(0)); - DBUG_RETURN(0); error3: - /* We already have an error, so ignore results of gzclose. */ - (void)gzclose(archive); + /* We already have an error, so ignore results of azclose. */ + (void)azclose(&archive); error2: my_close(create_file, MYF(0)); delete_table(name); @@ -636,18 +723,19 @@ error: /* This is where the actual row is written out. */ -int ha_archive::real_write_row(byte *buf, gzFile writer) +int ha_archive::real_write_row(byte *buf, azio_stream *writer) { - z_off_t written; + my_off_t written; uint *ptr, *end; DBUG_ENTER("ha_archive::real_write_row"); - written= gzwrite(writer, buf, table->s->reclength); - DBUG_PRINT("ha_archive::real_write_row", ("Wrote %d bytes expected %d", written, table->s->reclength)); + written= azwrite(writer, buf, table->s->reclength); + DBUG_PRINT("ha_archive::real_write_row", ("Wrote %d bytes expected %d", + written, table->s->reclength)); if (!delayed_insert || !bulk_insert) share->dirty= TRUE; - if (written != (z_off_t)table->s->reclength) + if (written != (my_off_t)table->s->reclength) DBUG_RETURN(errno ? errno : -1); /* We should probably mark the table as damagaged if the record is written @@ -663,8 +751,8 @@ int ha_archive::real_write_row(byte *buf, gzFile writer) if (size) { ((Field_blob*) table->field[*ptr])->get_ptr(&data_ptr); - written= gzwrite(writer, data_ptr, (unsigned)size); - if (written != (z_off_t)size) + written= azwrite(writer, data_ptr, (unsigned)size); + if (written != (my_off_t)size) DBUG_RETURN(errno ? errno : -1); } } @@ -684,6 +772,8 @@ int ha_archive::real_write_row(byte *buf, gzFile writer) int ha_archive::write_row(byte *buf) { int rc; + byte *read_buf= NULL; + ulonglong temp_auto; DBUG_ENTER("ha_archive::write_row"); if (share->crashed) @@ -693,13 +783,188 @@ int ha_archive::write_row(byte *buf) if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT) table->timestamp_field->set_time(); pthread_mutex_lock(&share->mutex); + + if (table->next_number_field) + { + KEY *mkey= &table->s->key_info[0]; // We only support one key right now + update_auto_increment(); + temp_auto= table->next_number_field->val_int(); + + /* + Bad news, this will cause a search for the unique value which is very + expensive since we will have to do a table scan which will lock up + all other writers during this period. This could perhaps be optimized + in the future. + */ + if (temp_auto == share->auto_increment_value && + mkey->flags & HA_NOSAME) + { + rc= HA_ERR_FOUND_DUPP_KEY; + goto error; + } + + if (temp_auto < share->auto_increment_value && + mkey->flags & HA_NOSAME) + { + /* + First we create a buffer that we can use for reading rows, and can pass + to get_row(). + */ + if (!(read_buf= (byte*) my_malloc(table->s->reclength, MYF(MY_WME)))) + { + rc= HA_ERR_OUT_OF_MEM; + goto error; + } + /* + All of the buffer must be written out or we won't see all of the + data + */ + azflush(&(share->archive_write), Z_SYNC_FLUSH); + share->forced_flushes++; + /* + Set the position of the local read thread to the beginning postion. + */ + if (read_data_header(&archive)) + { + rc= HA_ERR_CRASHED_ON_USAGE; + goto error; + } + + /* + Now we read and check all of the rows. + if (!memcmp(table->next_number_field->ptr, mfield->ptr, mfield->max_length())) + if ((longlong)temp_auto == + mfield->val_int((char*)(read_buf + mfield->offset()))) + */ + Field *mfield= table->next_number_field; + + while (!(get_row(&archive, read_buf))) + { + if (!memcmp(read_buf + mfield->offset(), table->next_number_field->ptr, + mfield->max_length())) + { + rc= HA_ERR_FOUND_DUPP_KEY; + goto error; + } + } + } + else + { + if (temp_auto > share->auto_increment_value) + auto_increment_value= share->auto_increment_value= temp_auto; + } + } + + /* + Notice that the global auto_increment has been increased. + In case of a failed row write, we will never try to reuse the value. + */ + share->rows_recorded++; - rc= real_write_row(buf, share->archive_write); + rc= real_write_row(buf, &(share->archive_write)); +error: pthread_mutex_unlock(&share->mutex); + if (read_buf) + my_free((gptr) read_buf, MYF(0)); + + DBUG_RETURN(rc); +} + + +ulonglong ha_archive::get_auto_increment() +{ + return share->auto_increment_value + 1; +} + +/* Initialized at each key walk (called multiple times unlike rnd_init()) */ +int ha_archive::index_init(uint keynr, bool sorted) +{ + DBUG_ENTER("ha_archive::index_init"); + active_index= keynr; + DBUG_RETURN(0); +} + +/* + No indexes, so if we get a request for an index search since we tell + the optimizer that we have unique indexes, we scan +*/ +int ha_archive::index_read(byte *buf, const byte *key, + uint key_len, enum ha_rkey_function find_flag) +{ + int rc; + DBUG_ENTER("ha_archive::index_read"); + rc= index_read_idx(buf, active_index, key, key_len, find_flag); DBUG_RETURN(rc); } + +int ha_archive::index_read_idx(byte *buf, uint index, const byte *key, + uint key_len, enum ha_rkey_function find_flag) +{ + int rc= 0; + bool found= 0; + KEY *mkey= &table->s->key_info[index]; + current_k_offset= mkey->key_part->offset; + current_key= key; + current_key_len= key_len; + + + DBUG_ENTER("ha_archive::index_read_idx"); + + /* + All of the buffer must be written out or we won't see all of the + data + */ + pthread_mutex_lock(&share->mutex); + azflush(&(share->archive_write), Z_SYNC_FLUSH); + share->forced_flushes++; + pthread_mutex_unlock(&share->mutex); + + /* + Set the position of the local read thread to the beginning postion. + */ + if (read_data_header(&archive)) + { + rc= HA_ERR_CRASHED_ON_USAGE; + goto error; + } + + while (!(get_row(&archive, buf))) + { + if (!memcmp(current_key, buf + current_k_offset, current_key_len)) + { + found= 1; + break; + } + } + + if (found) + DBUG_RETURN(0); + +error: + DBUG_RETURN(rc ? rc : HA_ERR_END_OF_FILE); +} + + +int ha_archive::index_next(byte * buf) +{ + bool found= 0; + + DBUG_ENTER("ha_archive::index_next"); + + while (!(get_row(&archive, buf))) + { + if (!memcmp(current_key, buf+current_k_offset, current_key_len)) + { + found= 1; + break; + } + } + + DBUG_RETURN(found ? 0 : HA_ERR_END_OF_FILE); +} + /* All calls that need to scan the table start with this method. If we are told that it is a table scan we rewind the file to the beginning, otherwise @@ -722,7 +987,7 @@ int ha_archive::rnd_init(bool scan) /* If dirty, we lock, and then reset/flush the data. - I found that just calling gzflush() doesn't always work. + I found that just calling azflush() doesn't always work. */ if (share->dirty == TRUE) { @@ -730,13 +995,14 @@ int ha_archive::rnd_init(bool scan) if (share->dirty == TRUE) { DBUG_PRINT("info", ("archive flushing out rows for scan")); - gzflush(share->archive_write, Z_SYNC_FLUSH); + azflush(&(share->archive_write), Z_SYNC_FLUSH); + share->forced_flushes++; share->dirty= FALSE; } pthread_mutex_unlock(&share->mutex); } - if (read_data_header(archive)) + if (read_data_header(&archive)) DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE); } @@ -748,16 +1014,17 @@ int ha_archive::rnd_init(bool scan) This is the method that is used to read a row. It assumes that the row is positioned where you want it. */ -int ha_archive::get_row(gzFile file_to_read, byte *buf) +int ha_archive::get_row(azio_stream *file_to_read, byte *buf) { - int read; // Bytes read, gzread() returns int + int read; // Bytes read, azread() returns int uint *ptr, *end; char *last; size_t total_blob_length= 0; DBUG_ENTER("ha_archive::get_row"); - read= gzread(file_to_read, buf, table->s->reclength); - DBUG_PRINT("ha_archive::get_row", ("Read %d bytes expected %d", read, table->s->reclength)); + read= azread(file_to_read, buf, table->s->reclength); + DBUG_PRINT("ha_archive::get_row", ("Read %d bytes expected %d", read, + table->s->reclength)); if (read == Z_STREAM_ERROR) DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE); @@ -777,7 +1044,10 @@ int ha_archive::get_row(gzFile file_to_read, byte *buf) for (ptr= table->s->blob_field, end=ptr + table->s->blob_fields ; ptr != end ; ptr++) - total_blob_length += ((Field_blob*) table->field[*ptr])->get_length(); + { + if (ha_get_bit_in_read_set(((Field_blob*) table->field[*ptr])->fieldnr)) + total_blob_length += ((Field_blob*) table->field[*ptr])->get_length(); + } /* Adjust our row buffer if we need be */ buffer.alloc(total_blob_length); @@ -791,11 +1061,18 @@ int ha_archive::get_row(gzFile file_to_read, byte *buf) size_t size= ((Field_blob*) table->field[*ptr])->get_length(); if (size) { - read= gzread(file_to_read, last, size); - if ((size_t) read != size) - DBUG_RETURN(HA_ERR_END_OF_FILE); - ((Field_blob*) table->field[*ptr])->set_ptr(size, last); - last += size; + if (ha_get_bit_in_read_set(((Field_blob*) table->field[*ptr])->fieldnr)) + { + read= azread(file_to_read, last, size); + if ((size_t) read != size) + DBUG_RETURN(HA_ERR_END_OF_FILE); + ((Field_blob*) table->field[*ptr])->set_ptr(size, last); + last += size; + } + else + { + (void)azseek(file_to_read, size, SEEK_CUR); + } } } DBUG_RETURN(0); @@ -821,8 +1098,8 @@ int ha_archive::rnd_next(byte *buf) statistic_increment(table->in_use->status_var.ha_read_rnd_next_count, &LOCK_status); - current_position= gztell(archive); - rc= get_row(archive, buf); + current_position= aztell(&archive); + rc= get_row(&archive, buf); if (rc != HA_ERR_END_OF_FILE) @@ -858,10 +1135,10 @@ int ha_archive::rnd_pos(byte * buf, byte *pos) DBUG_ENTER("ha_archive::rnd_pos"); statistic_increment(table->in_use->status_var.ha_read_rnd_next_count, &LOCK_status); - current_position= (z_off_t)my_get_ptr(pos, ref_length); - (void)gzseek(archive, current_position, SEEK_SET); + current_position= (my_off_t)my_get_ptr(pos, ref_length); + (void)azseek(&archive, current_position, SEEK_SET); - DBUG_RETURN(get_row(archive, buf)); + DBUG_RETURN(get_row(&archive, buf)); } /* @@ -890,17 +1167,18 @@ int ha_archive::optimize(THD* thd, HA_CHECK_OPT* check_opt) { DBUG_ENTER("ha_archive::optimize"); int rc; - gzFile writer; + azio_stream writer; char writer_filename[FN_REFLEN]; /* Flush any waiting data */ - gzflush(share->archive_write, Z_SYNC_FLUSH); + azflush(&(share->archive_write), Z_SYNC_FLUSH); + share->forced_flushes++; /* Lets create a file to contain the new data */ fn_format(writer_filename, share->table_name, "", ARN, MY_REPLACE_EXT|MY_UNPACK_FILENAME); - if ((writer= gzopen(writer_filename, "wb")) == NULL) + if (!(azopen(&writer, writer_filename, O_CREAT|O_WRONLY|O_TRUNC|O_BINARY))) DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE); /* @@ -910,6 +1188,7 @@ int ha_archive::optimize(THD* thd, HA_CHECK_OPT* check_opt) if (check_opt->flags == T_EXTEND) { + DBUG_PRINT("info", ("archive extended rebuild")); byte *buf; /* @@ -926,14 +1205,14 @@ int ha_archive::optimize(THD* thd, HA_CHECK_OPT* check_opt) Now we will rewind the archive file so that we are positioned at the start of the file. */ - rc= read_data_header(archive); + rc= read_data_header(&archive); /* Assuming now error from rewinding the archive file, we now write out the new header for out data file. */ if (!rc) - rc= write_data_header(writer); + rc= write_data_header(&writer); /* On success of writing out the new header, we now fetch each row and @@ -942,9 +1221,18 @@ int ha_archive::optimize(THD* thd, HA_CHECK_OPT* check_opt) if (!rc) { share->rows_recorded= 0; - while (!(rc= get_row(archive, buf))) + auto_increment_value= share->auto_increment_value= 0; + while (!(rc= get_row(&archive, buf))) { - real_write_row(buf, writer); + real_write_row(buf, &writer); + if (table->found_next_number_field) + { + Field *field= table->found_next_number_field; + if (share->auto_increment_value < + field->val_int((char*)(buf + field->offset()))) + auto_increment_value= share->auto_increment_value= + field->val_int((char*)(buf + field->offset())); + } share->rows_recorded++; } } @@ -956,33 +1244,43 @@ int ha_archive::optimize(THD* thd, HA_CHECK_OPT* check_opt) } else { + DBUG_PRINT("info", ("archive quick rebuild")); /* The quick method is to just read the data raw, and then compress it directly. */ - int read; // Bytes read, gzread() returns int + int read; // Bytes read, azread() returns int char block[IO_SIZE]; - if (gzrewind(archive) == -1) + if (azrewind(&archive) == -1) { rc= HA_ERR_CRASHED_ON_USAGE; + DBUG_PRINT("info", ("archive HA_ERR_CRASHED_ON_USAGE")); goto error; } - while ((read= gzread(archive, block, IO_SIZE))) - gzwrite(writer, block, read); + while ((read= azread(&archive, block, IO_SIZE)) > 0) + azwrite(&writer, block, read); } - gzflush(writer, Z_SYNC_FLUSH); + azclose(&writer); share->dirty= FALSE; - gzclose(share->archive_write); - share->archive_write= writer; + share->forced_flushes= 0; + azclose(&(share->archive_write)); + DBUG_PRINT("info", ("Reopening archive data file")); + if (!(azopen(&(share->archive_write), share->data_file_name, + O_WRONLY|O_APPEND|O_BINARY))) + { + DBUG_PRINT("info", ("Could not open archive write file")); + rc= HA_ERR_CRASHED_ON_USAGE; + goto error; + } my_rename(writer_filename,share->data_file_name,MYF(0)); /* Now we need to reopen our read descriptor since it has changed. */ - gzclose(archive); - if ((archive= gzopen(share->data_file_name, "rb")) == NULL) + azclose(&archive); + if (!(azopen(&archive, share->data_file_name, O_RDONLY|O_BINARY))) { rc= HA_ERR_CRASHED_ON_USAGE; goto error; @@ -992,7 +1290,7 @@ int ha_archive::optimize(THD* thd, HA_CHECK_OPT* check_opt) DBUG_RETURN(0); error: - gzclose(writer); + azclose(&writer); DBUG_RETURN(rc); } @@ -1042,6 +1340,15 @@ THR_LOCK_DATA **ha_archive::store_lock(THD *thd, return to; } +void ha_archive::update_create_info(HA_CREATE_INFO *create_info) +{ + ha_archive::info(HA_STATUS_AUTO | HA_STATUS_CONST); + if (!(create_info->used_fields & HA_CREATE_USED_AUTO)) + { + create_info->auto_increment_value=auto_increment_value; + } +} + /* Hints for optimizer, see ha_tina for more information @@ -1071,6 +1378,9 @@ void ha_archive::info(uint flag) delete_length= 0; index_file_length=0; + if (flag & HA_STATUS_AUTO) + auto_increment_value= share->auto_increment_value; + DBUG_VOID_RETURN; } @@ -1136,7 +1446,8 @@ int ha_archive::check(THD* thd, HA_CHECK_OPT* check_opt) thd->proc_info= "Checking table"; /* Flush any waiting data */ - gzflush(share->archive_write, Z_SYNC_FLUSH); + azflush(&(share->archive_write), Z_SYNC_FLUSH); + share->forced_flushes++; /* First we create a buffer that we can use for reading rows, and can pass @@ -1150,10 +1461,10 @@ int ha_archive::check(THD* thd, HA_CHECK_OPT* check_opt) start of the file. */ if (!rc) - read_data_header(archive); + read_data_header(&archive); if (!rc) - while (!(rc= get_row(archive, buf))) + while (!(rc= get_row(&archive, buf))) count--; my_free((char*)buf, MYF(0)); @@ -1183,4 +1494,3 @@ bool ha_archive::check_and_repair(THD *thd) DBUG_RETURN(repair(thd, &check_opt)); } -#endif /* HAVE_ARCHIVE_DB */ diff --git a/sql/ha_archive.h b/sql/ha_archive.h index 0fa5cdc56ca..9b351b7e8da 100644 --- a/sql/ha_archive.h +++ b/sql/ha_archive.h @@ -19,6 +19,7 @@ #endif #include <zlib.h> +#include "../storage/archive/azlib.h" /* Please read ha_archive.cc first. If you are looking for more general @@ -33,32 +34,38 @@ typedef struct st_archive_share { pthread_mutex_t mutex; THR_LOCK lock; File meta_file; /* Meta file we use */ - gzFile archive_write; /* Archive file we are working with */ + azio_stream archive_write; /* Archive file we are working with */ bool dirty; /* Flag for if a flush should occur */ bool crashed; /* Meta file is crashed */ ha_rows rows_recorded; /* Number of rows in tables */ + ulonglong auto_increment_value; + ulonglong forced_flushes; + ulonglong mean_rec_length; } ARCHIVE_SHARE; /* Version for file format. 1 - Initial Version */ -#define ARCHIVE_VERSION 1 +#define ARCHIVE_VERSION 2 class ha_archive: public handler { THR_LOCK_DATA lock; /* MySQL lock */ ARCHIVE_SHARE *share; /* Shared lock info */ - gzFile archive; /* Archive file we are working with */ - z_off_t current_position; /* The position of the row we just read */ + azio_stream archive; /* Archive file we are working with */ + my_off_t current_position; /* The position of the row we just read */ byte byte_buffer[IO_SIZE]; /* Initial buffer for our string */ String buffer; /* Buffer used for blob storage */ ha_rows scan_rows; /* Number of rows left in scan */ bool delayed_insert; /* If the insert is delayed */ bool bulk_insert; /* If we are performing a bulk insert */ + const byte *current_key; + uint current_key_len; + uint current_k_offset; public: - ha_archive(TABLE *table_arg); + ha_archive(TABLE_SHARE *table_arg); ~ha_archive() { } @@ -67,31 +74,47 @@ public: const char **bas_ext() const; ulong table_flags() const { - return (HA_REC_NOT_IN_SEQ | HA_NOT_EXACT_COUNT | HA_NO_AUTO_INCREMENT | + return (HA_REC_NOT_IN_SEQ | HA_NOT_EXACT_COUNT | HA_CAN_BIT_FIELD | HA_FILE_BASED | HA_CAN_INSERT_DELAYED | HA_CAN_GEOMETRY); } ulong index_flags(uint idx, uint part, bool all_parts) const { - return 0; + return HA_ONLY_WHOLE_INDEX; } + ulonglong get_auto_increment(); + uint max_supported_keys() const { return 1; } + uint max_supported_key_length() const { return sizeof(ulonglong); } + uint max_supported_key_part_length() const { return sizeof(ulonglong); } + int index_init(uint keynr, bool sorted); + virtual int index_read(byte * buf, const byte * key, + uint key_len, enum ha_rkey_function find_flag); + virtual int index_read_idx(byte * buf, uint index, const byte * key, + uint key_len, enum ha_rkey_function find_flag); + int index_next(byte * buf); int open(const char *name, int mode, uint test_if_locked); int close(void); int write_row(byte * buf); - int real_write_row(byte *buf, gzFile writer); + int real_write_row(byte *buf, azio_stream *writer); int delete_all_rows(); int rnd_init(bool scan=1); int rnd_next(byte *buf); int rnd_pos(byte * buf, byte *pos); - int get_row(gzFile file_to_read, byte *buf); - int read_meta_file(File meta_file, ha_rows *rows); - int write_meta_file(File meta_file, ha_rows rows, bool dirty); + int get_row(azio_stream *file_to_read, byte *buf); + int read_meta_file(File meta_file, ha_rows *rows, + ulonglong *auto_increment, + ulonglong *forced_flushes); + int write_meta_file(File meta_file, ha_rows rows, + ulonglong auto_increment, + ulonglong forced_flushes, + bool dirty); ARCHIVE_SHARE *get_share(const char *table_name, TABLE *table, int *rc); int free_share(ARCHIVE_SHARE *share); bool auto_repair() const { return 1; } // For the moment we just do this - int read_data_header(gzFile file_to_read); - int write_data_header(gzFile file_to_write); + int read_data_header(azio_stream *file_to_read); + int write_data_header(azio_stream *file_to_write); void position(const byte *record); void info(uint); + void update_create_info(HA_CREATE_INFO *create_info); int create(const char *name, TABLE *form, HA_CREATE_INFO *create_info); int optimize(THD* thd, HA_CHECK_OPT* check_opt); int repair(THD* thd, HA_CHECK_OPT* check_opt); @@ -109,5 +132,5 @@ public: }; bool archive_db_init(void); -bool archive_db_end(void); +int archive_db_end(ha_panic_function type); diff --git a/sql/ha_berkeley.cc b/sql/ha_berkeley.cc index 72af402a0dc..910a703fdeb 100644 --- a/sql/ha_berkeley.cc +++ b/sql/ha_berkeley.cc @@ -53,7 +53,6 @@ #include "mysql_priv.h" -#ifdef HAVE_BERKELEY_DB #include <m_ctype.h> #include <myisampack.h> #include <hash.h> @@ -72,13 +71,21 @@ #define STATUS_ROW_COUNT_INIT 2 #define STATUS_BDB_ANALYZE 4 +const u_int32_t bdb_DB_TXN_NOSYNC= DB_TXN_NOSYNC; +const u_int32_t bdb_DB_RECOVER= DB_RECOVER; +const u_int32_t bdb_DB_PRIVATE= DB_PRIVATE; +const u_int32_t bdb_DB_DIRECT_DB= DB_DIRECT_DB; +const u_int32_t bdb_DB_DIRECT_LOG= DB_DIRECT_LOG; const char *ha_berkeley_ext=".db"; bool berkeley_shared_data=0; -u_int32_t berkeley_init_flags= DB_PRIVATE | DB_RECOVER, berkeley_env_flags=0, - berkeley_lock_type=DB_LOCK_DEFAULT; -ulong berkeley_cache_size, berkeley_log_buffer_size, berkeley_log_file_size=0; +u_int32_t berkeley_init_flags= DB_PRIVATE | DB_RECOVER, + berkeley_env_flags= DB_LOG_AUTOREMOVE, + berkeley_lock_type= DB_LOCK_DEFAULT; +ulong berkeley_log_buffer_size=0 , berkeley_log_file_size=0; +ulonglong berkeley_cache_size= 0; char *berkeley_home, *berkeley_tmpdir, *berkeley_logdir; long berkeley_lock_scan_time=0; +ulong berkeley_region_size=0, berkeley_cache_parts=1; ulong berkeley_trans_retry=1; ulong berkeley_max_lock; pthread_mutex_t bdb_mutex; @@ -87,13 +94,17 @@ static DB_ENV *db_env; static HASH bdb_open_tables; const char *berkeley_lock_names[] = -{ "DEFAULT", "OLDEST","RANDOM","YOUNGEST",0 }; +{ "DEFAULT", "OLDEST", "RANDOM", "YOUNGEST", "EXPIRE", "MAXLOCKS", + "MAXWRITE", "MINLOCKS", "MINWRITE", 0 }; u_int32_t berkeley_lock_types[]= -{ DB_LOCK_DEFAULT, DB_LOCK_OLDEST, DB_LOCK_RANDOM }; +{ DB_LOCK_DEFAULT, DB_LOCK_OLDEST, DB_LOCK_RANDOM, DB_LOCK_YOUNGEST, + DB_LOCK_EXPIRE, DB_LOCK_MAXLOCKS, DB_LOCK_MAXWRITE, DB_LOCK_MINLOCKS, + DB_LOCK_MINWRITE }; TYPELIB berkeley_lock_typelib= {array_elements(berkeley_lock_names)-1,"", berkeley_lock_names, NULL}; -static void berkeley_print_error(const char *db_errpfx, char *buffer); +static void berkeley_print_error(const DB_ENV *db_env, const char *db_errpfx, + const char *buffer); static byte* bdb_get_key(BDB_SHARE *share,uint *length, my_bool not_used __attribute__((unused))); static BDB_SHARE *get_share(const char *table_name, TABLE *table); @@ -101,24 +112,28 @@ static int free_share(BDB_SHARE *share, TABLE *table, uint hidden_primary_key, bool mutex_is_locked); static int write_status(DB *status_block, char *buff, uint length); static void update_status(BDB_SHARE *share, TABLE *table); -static void berkeley_noticecall(DB_ENV *db_env, db_notices notice); static int berkeley_close_connection(THD *thd); static int berkeley_commit(THD *thd, bool all); static int berkeley_rollback(THD *thd, bool all); +static int berkeley_rollback_to_savepoint(THD* thd, void *savepoint); +static int berkeley_savepoint(THD* thd, void *savepoint); +static int berkeley_release_savepoint(THD* thd, void *savepoint); +static handler *berkeley_create_handler(TABLE_SHARE *table); handlerton berkeley_hton = { + MYSQL_HANDLERTON_INTERFACE_VERSION, "BerkeleyDB", SHOW_OPTION_YES, "Supports transactions and page-level locking", DB_TYPE_BERKELEY_DB, berkeley_init, 0, /* slot */ - 0, /* savepoint size */ + sizeof(DB_TXN *), /* savepoint size */ berkeley_close_connection, - NULL, /* savepoint_set */ - NULL, /* savepoint_rollback */ - NULL, /* savepoint_release */ + berkeley_savepoint, /* savepoint_set */ + berkeley_rollback_to_savepoint, /* savepoint_rollback */ + berkeley_release_savepoint, /* savepoint_release */ berkeley_commit, berkeley_rollback, NULL, /* prepare */ @@ -128,12 +143,30 @@ handlerton berkeley_hton = { NULL, /* create_cursor_read_view */ NULL, /* set_cursor_read_view */ NULL, /* close_cursor_read_view */ - HTON_CLOSE_CURSORS_AT_COMMIT + berkeley_create_handler, /* Create a new handler */ + NULL, /* Drop a database */ + berkeley_end, /* Panic call */ + NULL, /* Start Consistent Snapshot */ + berkeley_flush_logs, /* Flush logs */ + berkeley_show_status, /* Show status */ + NULL, /* Partition flags */ + NULL, /* Alter table flags */ + NULL, /* Alter Tablespace */ + NULL, /* Fill Files Table */ + HTON_CLOSE_CURSORS_AT_COMMIT | HTON_FLUSH_AFTER_RENAME, + NULL, /* binlog_func */ + NULL /* binlog_log_query */ }; +handler *berkeley_create_handler(TABLE_SHARE *table) +{ + return new ha_berkeley(table); +} + typedef struct st_berkeley_trx_data { DB_TXN *all; DB_TXN *stmt; + DB_TXN *sp_level; uint bdb_lock_count; } berkeley_trx_data; @@ -174,7 +207,6 @@ bool berkeley_init(void) goto error; db_env->set_errcall(db_env,berkeley_print_error); db_env->set_errpfx(db_env,"bdb"); - db_env->set_noticecall(db_env, berkeley_noticecall); db_env->set_tmp_dir(db_env, berkeley_tmpdir); db_env->set_data_dir(db_env, mysql_data_home); db_env->set_flags(db_env, berkeley_env_flags, 1); @@ -183,13 +215,20 @@ bool berkeley_init(void) if (opt_endinfo) db_env->set_verbose(db_env, - DB_VERB_CHKPOINT | DB_VERB_DEADLOCK | DB_VERB_RECOVERY, + DB_VERB_DEADLOCK | DB_VERB_RECOVERY, 1); - db_env->set_cachesize(db_env, 0, berkeley_cache_size, 0); + if (berkeley_cache_size > (uint) ~0) + db_env->set_cachesize(db_env, berkeley_cache_size / (1024*1024L*1024L), + berkeley_cache_size % (1024L*1024L*1024L), + berkeley_cache_parts); + else + db_env->set_cachesize(db_env, 0, berkeley_cache_size, berkeley_cache_parts); + db_env->set_lg_max(db_env, berkeley_log_file_size); db_env->set_lg_bsize(db_env, berkeley_log_buffer_size); db_env->set_lk_detect(db_env, berkeley_lock_type); + db_env->set_lg_regionmax(db_env, berkeley_region_size); if (berkeley_max_lock) db_env->set_lk_max(db_env, berkeley_max_lock); @@ -214,18 +253,19 @@ error: } -bool berkeley_end(void) +int berkeley_end(ha_panic_function type) { - int error; + int error= 0; DBUG_ENTER("berkeley_end"); - if (!db_env) - return 1; /* purecov: tested */ - berkeley_cleanup_log_files(); - error=db_env->close(db_env,0); // Error is logged - db_env=0; - hash_free(&bdb_open_tables); - pthread_mutex_destroy(&bdb_mutex); - DBUG_RETURN(error != 0); + if (db_env) + { + berkeley_cleanup_log_files(); + error= db_env->close(db_env,0); // Error is logged + db_env= 0; + hash_free(&bdb_open_tables); + pthread_mutex_destroy(&bdb_mutex); + } + DBUG_RETURN(error); } static int berkeley_close_connection(THD *thd) @@ -258,7 +298,7 @@ static int berkeley_commit(THD *thd, bool all) DBUG_PRINT("trans",("ending transaction %s", all ? "all" : "stmt")); berkeley_trx_data *trx=(berkeley_trx_data *)thd->ha_data[berkeley_hton.slot]; DB_TXN **txn= all ? &trx->all : &trx->stmt; - int error=txn_commit(*txn,0); + int error= (*txn)->commit(*txn,0); *txn=0; #ifndef DBUG_OFF if (error) @@ -273,15 +313,58 @@ static int berkeley_rollback(THD *thd, bool all) DBUG_PRINT("trans",("aborting transaction %s", all ? "all" : "stmt")); berkeley_trx_data *trx=(berkeley_trx_data *)thd->ha_data[berkeley_hton.slot]; DB_TXN **txn= all ? &trx->all : &trx->stmt; - int error=txn_abort(*txn); + int error= (*txn)->abort(*txn); *txn=0; DBUG_RETURN(error); } +static int berkeley_savepoint(THD* thd, void *savepoint) +{ + int error; + DB_TXN **save_txn= (DB_TXN**) savepoint; + DBUG_ENTER("berkeley_savepoint"); + berkeley_trx_data *trx=(berkeley_trx_data *)thd->ha_data[berkeley_hton.slot]; + if (!(error= db_env->txn_begin(db_env, trx->sp_level, save_txn, 0))) + { + trx->sp_level= *save_txn; + } + DBUG_RETURN(error); +} + +static int berkeley_rollback_to_savepoint(THD* thd, void *savepoint) +{ + int error; + DB_TXN *parent, **save_txn= (DB_TXN**) savepoint; + DBUG_ENTER("berkeley_rollback_to_savepoint"); + berkeley_trx_data *trx=(berkeley_trx_data *)thd->ha_data[berkeley_hton.slot]; + parent= (*save_txn)->parent; + if (!(error= (*save_txn)->abort(*save_txn))) + { + trx->sp_level= parent; + error= berkeley_savepoint(thd, savepoint); + } + DBUG_RETURN(error); +} + +static int berkeley_release_savepoint(THD* thd, void *savepoint) +{ + int error; + DB_TXN *parent, **save_txn= (DB_TXN**) savepoint; + DBUG_ENTER("berkeley_release_savepoint"); + berkeley_trx_data *trx=(berkeley_trx_data *)thd->ha_data[berkeley_hton.slot]; + parent= (*save_txn)->parent; + if (!(error= (*save_txn)->commit(*save_txn,0))) + { + trx->sp_level= parent; + *save_txn= 0; + } + DBUG_RETURN(error); +} -int berkeley_show_logs(Protocol *protocol) +static bool berkeley_show_logs(THD *thd, stat_print_fn *stat_print) { char **all_logs, **free_logs, **a, **f; + uint hton_name_len= strlen(berkeley_hton.name); int error=1; MEM_ROOT **root_ptr= my_pthread_getspecific_ptr(MEM_ROOT**,THR_MALLOC); MEM_ROOT show_logs_root, *old_mem_root= *root_ptr; @@ -306,21 +389,20 @@ int berkeley_show_logs(Protocol *protocol) { for (a = all_logs, f = free_logs; *a; ++a) { - protocol->prepare_for_resend(); - protocol->store(*a, system_charset_info); - protocol->store(STRING_WITH_LEN("BDB"), system_charset_info); if (f && *f && strcmp(*a, *f) == 0) { - f++; - protocol->store(SHOW_LOG_STATUS_FREE, system_charset_info); + f++; + if ((error= stat_print(thd, berkeley_hton.name, hton_name_len, + *a, strlen(*a), + STRING_WITH_LEN(SHOW_LOG_STATUS_FREE)))) + break; } else - protocol->store(SHOW_LOG_STATUS_INUSE, system_charset_info); - - if (protocol->write()) { - error=1; - goto err; + if ((error= stat_print(thd, berkeley_hton.name, hton_name_len, + *a, strlen(*a), + STRING_WITH_LEN(SHOW_LOG_STATUS_INUSE)))) + break; } } } @@ -330,26 +412,24 @@ err: DBUG_RETURN(error); } - -static void berkeley_print_error(const char *db_errpfx, char *buffer) +bool berkeley_show_status(THD *thd, stat_print_fn *stat_print, + enum ha_stat_type stat_type) { - sql_print_error("%s: %s",db_errpfx,buffer); /* purecov: tested */ + switch (stat_type) { + case HA_ENGINE_LOGS: + return berkeley_show_logs(thd, stat_print); + default: + return FALSE; + } } - -static void berkeley_noticecall(DB_ENV *db_env, db_notices notice) +static void berkeley_print_error(const DB_ENV *db_env, const char *db_errpfx, + const char *buffer) { - switch (notice) - { - case DB_NOTICE_LOGFILE_CHANGED: /* purecov: tested */ - pthread_mutex_lock(&LOCK_manager); - manager_status |= MANAGER_BERKELEY_LOG_CLEANUP; - pthread_mutex_unlock(&LOCK_manager); - pthread_cond_signal(&COND_manager); - break; - } + sql_print_error("%s: %s",db_errpfx,buffer); /* purecov: tested */ } + void berkeley_cleanup_log_files(void) { DBUG_ENTER("berkeley_cleanup_log_files"); @@ -387,7 +467,7 @@ void berkeley_cleanup_log_files(void) ** Berkeley DB tables *****************************************************************************/ -ha_berkeley::ha_berkeley(TABLE *table_arg) +ha_berkeley::ha_berkeley(TABLE_SHARE *table_arg) :handler(&berkeley_hton, table_arg), alloc_ptr(0), rec_buff(0), file(0), int_table_flags(HA_REC_NOT_IN_SEQ | HA_FAST_KEY_READ | HA_NULL_IN_KEY | HA_CAN_INDEX_BLOBS | HA_NOT_EXACT_COUNT | @@ -414,13 +494,14 @@ ulong ha_berkeley::index_flags(uint idx, uint part, bool all_parts) const | HA_READ_RANGE); for (uint i= all_parts ? 0 : part ; i <= part ; i++) { - if (table->key_info[idx].key_part[i].field->type() == FIELD_TYPE_BLOB) + KEY_PART_INFO *key_part= table_share->key_info[idx].key_part+i; + if (key_part->field->type() == FIELD_TYPE_BLOB) { /* We can't use BLOBS to shortcut sorts */ flags&= ~(HA_READ_ORDER | HA_KEYREAD_ONLY | HA_READ_RANGE); break; } - switch (table->key_info[idx].key_part[i].field->key_type()) { + switch (key_part->field->key_type()) { case HA_KEYTYPE_TEXT: case HA_KEYTYPE_VARTEXT1: case HA_KEYTYPE_VARTEXT2: @@ -428,8 +509,7 @@ ulong ha_berkeley::index_flags(uint idx, uint part, bool all_parts) const As BDB stores only one copy of equal strings, we can't use key read on these. Binary collations do support key read though. */ - if (!(table->key_info[idx].key_part[i].field->charset()->state - & MY_CS_BINSORT)) + if (!(key_part->field->charset()->state & MY_CS_BINSORT)) flags&= ~HA_KEYREAD_ONLY; break; default: // Keep compiler happy @@ -558,7 +638,6 @@ int ha_berkeley::open(const char *name, int mode, uint test_if_locked) uint open_mode=(mode == O_RDONLY ? DB_RDONLY : 0) | DB_THREAD; uint max_key_length; int error; - TABLE_SHARE *table_share= table->s; DBUG_ENTER("ha_berkeley::open"); /* Open primary key */ @@ -580,7 +659,7 @@ int ha_berkeley::open(const char *name, int mode, uint test_if_locked) &key_buff2, max_key_length, &primary_key_buff, (hidden_primary_key ? 0 : - table->key_info[table_share->primary_key].key_length), + table_share->key_info[table_share->primary_key].key_length), NullS))) DBUG_RETURN(1); /* purecov: inspected */ if (!(rec_buff= (byte*) my_malloc((alloced_rec_buff_length= @@ -623,10 +702,10 @@ int ha_berkeley::open(const char *name, int mode, uint test_if_locked) berkeley_cmp_packed_key)); if (!hidden_primary_key) file->app_private= (void*) (table->key_info + table_share->primary_key); - if ((error= txn_begin(db_env, 0, (DB_TXN**) &transaction, 0)) || + if ((error= db_env->txn_begin(db_env, NULL, (DB_TXN**) &transaction, 0)) || (error= (file->open(file, transaction, fn_format(name_buff, name, "", ha_berkeley_ext, - 2 | 4), + MY_UNPACK_FILENAME|MY_APPEND_EXT), "main", DB_BTREE, open_mode, 0))) || (error= transaction->commit(transaction, 0))) { @@ -639,7 +718,7 @@ int ha_berkeley::open(const char *name, int mode, uint test_if_locked) /* Open other keys; These are part of the share structure */ key_file[primary_key]=file; - key_type[primary_key]=DB_NOOVERWRITE; + key_type[primary_key]= hidden_primary_key ? 0 : DB_NOOVERWRITE; DB **ptr=key_file; for (uint i=0, used_keys=0; i < table_share->keys ; i++, ptr++) @@ -662,7 +741,8 @@ int ha_berkeley::open(const char *name, int mode, uint test_if_locked) DBUG_PRINT("bdb",("Setting DB_DUP for key %u", i)); (*ptr)->set_flags(*ptr, DB_DUP); } - if ((error= txn_begin(db_env, 0, (DB_TXN**) &transaction, 0)) || + if ((error= db_env->txn_begin(db_env, NULL, (DB_TXN**) &transaction, + 0)) || (error=((*ptr)->open(*ptr, transaction, name_buff, part, DB_BTREE, open_mode, 0))) || (error= transaction->commit(transaction, 0))) @@ -736,9 +816,9 @@ bool ha_berkeley::fix_rec_buff_for_blob(ulong length) ulong ha_berkeley::max_row_length(const byte *buf) { - ulong length= table->s->reclength + table->s->fields*2; + ulong length= table_share->reclength + table_share->fields*2; uint *ptr, *end; - for (ptr= table->s->blob_field, end=ptr + table->s->blob_fields ; + for (ptr= table_share->blob_field, end=ptr + table_share->blob_fields ; ptr != end ; ptr++) { @@ -765,25 +845,26 @@ int ha_berkeley::pack_row(DBT *row, const byte *record, bool new_row) if (share->fixed_length_row) { row->data=(void*) record; - row->size= table->s->reclength+hidden_primary_key; + row->size= table_share->reclength+hidden_primary_key; if (hidden_primary_key) { if (new_row) get_auto_primary_key(current_ident); - memcpy_fixed((char*) record+table->s->reclength, (char*) current_ident, + memcpy_fixed((char*) record+table_share->reclength, + (char*) current_ident, BDB_HIDDEN_PRIMARY_KEY_LENGTH); } return 0; } - if (table->s->blob_fields) + if (table_share->blob_fields) { if (fix_rec_buff_for_blob(max_row_length(record))) return HA_ERR_OUT_OF_MEM; /* purecov: inspected */ } /* Copy null bits */ - memcpy(rec_buff, record, table->s->null_bytes); - ptr= rec_buff + table->s->null_bytes; + memcpy(rec_buff, record, table_share->null_bytes); + ptr= rec_buff + table_share->null_bytes; for (Field **field=table->field ; *field ; field++) ptr=(byte*) (*field)->pack((char*) ptr, @@ -806,13 +887,13 @@ int ha_berkeley::pack_row(DBT *row, const byte *record, bool new_row) void ha_berkeley::unpack_row(char *record, DBT *row) { if (share->fixed_length_row) - memcpy(record,(char*) row->data,table->s->reclength+hidden_primary_key); + memcpy(record,(char*) row->data,table_share->reclength+hidden_primary_key); else { /* Copy null bits */ const char *ptr= (const char*) row->data; - memcpy(record, ptr, table->s->null_bytes); - ptr+= table->s->null_bytes; + memcpy(record, ptr, table_share->null_bytes); + ptr+= table_share->null_bytes; for (Field **field=table->field ; *field ; field++) ptr= (*field)->unpack(record + (*field)->offset(), ptr); } @@ -958,7 +1039,7 @@ int ha_berkeley::write_row(byte * record) DBUG_RETURN(error); /* purecov: inspected */ table->insert_or_update= 1; // For handling of VARCHAR - if (table->s->keys + test(hidden_primary_key) == 1) + if (table_share->keys + test(hidden_primary_key) == 1) { error=file->put(file, transaction, create_key(&prim_key, primary_key, key_buff, record), @@ -977,7 +1058,7 @@ int ha_berkeley::write_row(byte * record) &row, key_type[primary_key]))) { changed_keys.set_bit(primary_key); - for (uint keynr=0 ; keynr < table->s->keys ; keynr++) + for (uint keynr=0 ; keynr < table_share->keys ; keynr++) { if (keynr == primary_key) continue; @@ -1005,7 +1086,7 @@ int ha_berkeley::write_row(byte * record) { new_error = 0; for (uint keynr=0; - keynr < table->s->keys+test(hidden_primary_key); + keynr < table_share->keys+test(hidden_primary_key); keynr++) { if (changed_keys.is_set(keynr)) @@ -1148,7 +1229,7 @@ int ha_berkeley::restore_keys(DB_TXN *trans, key_map *changed_keys, that one just put back the old value. */ if (!changed_keys->is_clear_all()) { - for (keynr=0 ; keynr < table->s->keys+test(hidden_primary_key) ; keynr++) + for (keynr=0 ; keynr < table_share->keys+test(hidden_primary_key) ; keynr++) { if (changed_keys->is_set(keynr)) { @@ -1213,7 +1294,7 @@ int ha_berkeley::update_row(const byte * old_row, byte * new_row) using_ignore))) { // Update all other keys - for (uint keynr=0 ; keynr < table->s->keys ; keynr++) + for (uint keynr=0 ; keynr < table_share->keys ; keynr++) { if (keynr == primary_key) continue; @@ -1325,7 +1406,7 @@ int ha_berkeley::remove_keys(DB_TXN *trans, const byte *record, { int result = 0; for (uint keynr=0; - keynr < table->s->keys+test(hidden_primary_key); + keynr < table_share->keys+test(hidden_primary_key); keynr++) { if (keys->is_set(keynr)) @@ -1346,7 +1427,7 @@ int ha_berkeley::delete_row(const byte * record) { int error; DBT row, prim_key; - key_map keys= table->s->keys_in_use; + key_map keys= table_share->keys_in_use; DBUG_ENTER("delete_row"); statistic_increment(table->in_use->status_var.ha_delete_count,&LOCK_status); @@ -1378,11 +1459,12 @@ int ha_berkeley::delete_row(const byte * record) } -int ha_berkeley::index_init(uint keynr) +int ha_berkeley::index_init(uint keynr, bool sorted) { int error; DBUG_ENTER("ha_berkeley::index_init"); - DBUG_PRINT("enter",("table: '%s' key: %d", table->s->table_name, keynr)); + DBUG_PRINT("enter",("table: '%s' key: %d", table_share->table_name.str, + keynr)); /* Under some very rare conditions (like full joins) we may already have @@ -1409,7 +1491,7 @@ int ha_berkeley::index_end() DBUG_ENTER("ha_berkely::index_end"); if (cursor) { - DBUG_PRINT("enter",("table: '%s'", table->s->table_name)); + DBUG_PRINT("enter",("table: '%s'", table_share->table_name.str)); error=cursor->c_close(cursor); cursor=0; } @@ -1656,7 +1738,7 @@ int ha_berkeley::rnd_init(bool scan) { DBUG_ENTER("rnd_init"); current_row.flags=DB_DBT_REALLOC; - DBUG_RETURN(index_init(primary_key)); + DBUG_RETURN(index_init(primary_key, 0)); } int ha_berkeley::rnd_end() @@ -1764,14 +1846,14 @@ void ha_berkeley::info(uint flag) if ((flag & HA_STATUS_CONST) || version != share->version) { version=share->version; - for (uint i=0 ; i < table->s->keys ; i++) + for (uint i=0 ; i < table_share->keys ; i++) { table->key_info[i].rec_per_key[table->key_info[i].key_parts-1]= share->rec_per_key[i]; } } /* Don't return key if we got an error for the internal primary key */ - if (flag & HA_STATUS_ERRKEY && last_dup_key < table->s->keys) + if (flag & HA_STATUS_ERRKEY && last_dup_key < table_share->keys) errkey= last_dup_key; DBUG_VOID_RETURN; } @@ -1842,6 +1924,8 @@ int ha_berkeley::external_lock(THD *thd, int lock_type) if (!trx) DBUG_RETURN(1); } + if (trx->all == 0) + trx->sp_level= 0; if (lock_type != F_UNLCK) { if (!trx->bdb_lock_count++) @@ -1855,17 +1939,18 @@ int ha_berkeley::external_lock(THD *thd, int lock_type) /* We have to start a master transaction */ DBUG_PRINT("trans",("starting transaction all: options: 0x%lx", (ulong) thd->options)); - if ((error=txn_begin(db_env, 0, &trx->all, 0))) + if ((error= db_env->txn_begin(db_env, NULL, &trx->all, 0))) { trx->bdb_lock_count--; // We didn't get the lock DBUG_RETURN(error); } + trx->sp_level= trx->all; trans_register_ha(thd, TRUE, &berkeley_hton); if (thd->in_lock_tables) DBUG_RETURN(0); // Don't create stmt trans } DBUG_PRINT("trans",("starting transaction stmt")); - if ((error=txn_begin(db_env, trx->all, &trx->stmt, 0))) + if ((error= db_env->txn_begin(db_env, trx->sp_level, &trx->stmt, 0))) { /* We leave the possible master transaction open */ trx->bdb_lock_count--; // We didn't get the lock @@ -1890,7 +1975,7 @@ int ha_berkeley::external_lock(THD *thd, int lock_type) We must in this case commit the work to keep the row locks */ DBUG_PRINT("trans",("commiting non-updating transaction")); - error= txn_commit(trx->stmt,0); + error= trx->stmt->commit(trx->stmt,0); trx->stmt= transaction= 0; } } @@ -1919,7 +2004,7 @@ int ha_berkeley::start_stmt(THD *thd, thr_lock_type lock_type) if (!trx->stmt) { DBUG_PRINT("trans",("starting transaction stmt")); - error=txn_begin(db_env, trx->all, &trx->stmt, 0); + error= db_env->txn_begin(db_env, trx->sp_level, &trx->stmt, 0); trans_register_ha(thd, FALSE, &berkeley_hton); } transaction= trx->stmt; @@ -2014,13 +2099,14 @@ int ha_berkeley::create(const char *name, register TABLE *form, int error; DBUG_ENTER("ha_berkeley::create"); - fn_format(name_buff,name,"", ha_berkeley_ext,2 | 4); + fn_format(name_buff,name,"", ha_berkeley_ext, + MY_UNPACK_FILENAME|MY_APPEND_EXT); /* Create the main table that will hold the real rows */ if ((error= create_sub_table(name_buff,"main",DB_BTREE,0))) DBUG_RETURN(error); /* purecov: inspected */ - primary_key= table->s->primary_key; + primary_key= form->s->primary_key; /* Create the keys */ for (uint i=0; i < form->s->keys; i++) { @@ -2028,7 +2114,7 @@ int ha_berkeley::create(const char *name, register TABLE *form, { sprintf(part,"key%02d",index++); if ((error= create_sub_table(name_buff, part, DB_BTREE, - (table->key_info[i].flags & HA_NOSAME) ? 0 : + (form->key_info[i].flags & HA_NOSAME) ? 0 : DB_DUP))) DBUG_RETURN(error); /* purecov: inspected */ } @@ -2044,7 +2130,7 @@ int ha_berkeley::create(const char *name, register TABLE *form, "status", DB_BTREE, DB_CREATE, 0)))) { char rec_buff[4+MAX_KEY*4]; - uint length= 4+ table->s->keys*4; + uint length= 4+ form->s->keys*4; bzero(rec_buff, length); error= write_status(status_block, rec_buff, length); status_block->close(status_block,0); @@ -2063,8 +2149,9 @@ int ha_berkeley::delete_table(const char *name) if ((error=db_create(&file, db_env, 0))) my_errno=error; /* purecov: inspected */ else - error=file->remove(file,fn_format(name_buff,name,"",ha_berkeley_ext,2 | 4), - NULL,0); + error=file->remove(file,fn_format(name_buff,name,"",ha_berkeley_ext, + MY_UNPACK_FILENAME|MY_APPEND_EXT), + NULL,0); file=0; // Safety DBUG_RETURN(error); } @@ -2082,9 +2169,11 @@ int ha_berkeley::rename_table(const char * from, const char * to) { /* On should not do a file->close() after rename returns */ error= file->rename(file, - fn_format(from_buff, from, "", ha_berkeley_ext, 2 | 4), + fn_format(from_buff, from, "", + ha_berkeley_ext, + MY_UNPACK_FILENAME|MY_APPEND_EXT), NULL, fn_format(to_buff, to, "", ha_berkeley_ext, - 2 | 4), 0); + MY_UNPACK_FILENAME|MY_APPEND_EXT), 0); } return error; } @@ -2164,9 +2253,9 @@ ulonglong ha_berkeley::get_auto_increment() (void) ha_berkeley::extra(HA_EXTRA_KEYREAD); /* Set 'active_index' */ - ha_berkeley::index_init(table->s->next_number_index); + ha_berkeley::index_init(table_share->next_number_index, 0); - if (!table->s->next_number_key_offset) + if (!table_share->next_number_key_offset) { // Autoincrement at key-start error=ha_berkeley::index_last(table->record[1]); } @@ -2179,7 +2268,7 @@ ulonglong ha_berkeley::get_auto_increment() /* Reading next available number for a sub key */ ha_berkeley::create_key(&last_key, active_index, key_buff, table->record[0], - table->s->next_number_key_offset); + table_share->next_number_key_offset); /* Store for compare */ memcpy(old_key.data=key_buff2, key_buff, (old_key.size=last_key.size)); old_key.app_private=(void*) key_info; @@ -2209,7 +2298,7 @@ ulonglong ha_berkeley::get_auto_increment() } if (!error) nr= (ulonglong) - table->next_number_field->val_int_offset(table->s->rec_buff_length)+1; + table->next_number_field->val_int_offset(table_share->rec_buff_length)+1; ha_berkeley::index_end(); (void) ha_berkeley::extra(HA_EXTRA_NO_KEYREAD); return nr; @@ -2259,48 +2348,14 @@ int ha_berkeley::analyze(THD* thd, HA_CHECK_OPT* check_opt) berkeley_trx_data *trx=(berkeley_trx_data *)thd->ha_data[berkeley_hton.slot]; DBUG_ASSERT(trx); - /* - Original bdb documentation says: - "The DB->stat method cannot be transaction-protected. - For this reason, it should be called in a thread of - control that has no open cursors or active transactions." - So, let's check if there are any changes have been done since - the beginning of the transaction.. - */ - - if (!db_env->txn_stat(db_env, &txn_stat_ptr, 0) && - txn_stat_ptr && txn_stat_ptr->st_nactive>=2) - { - DB_TXN_ACTIVE *atxn_stmt= 0, *atxn_all= 0; - - u_int32_t all_id= trx->all->id(trx->all); - u_int32_t stmt_id= trx->stmt->id(trx->stmt); - - DB_TXN_ACTIVE *cur= txn_stat_ptr->st_txnarray; - DB_TXN_ACTIVE *end= cur + txn_stat_ptr->st_nactive; - for (; cur!=end && (!atxn_stmt || !atxn_all); cur++) - { - if (cur->txnid==all_id) atxn_all= cur; - if (cur->txnid==stmt_id) atxn_stmt= cur; - } - - if (atxn_stmt && atxn_all && - log_compare(&atxn_stmt->lsn,&atxn_all->lsn)) - { - free(txn_stat_ptr); - return HA_ADMIN_REJECT; - } - free(txn_stat_ptr); - } - - for (i=0 ; i < table->s->keys ; i++) + for (i=0 ; i < table_share->keys ; i++) { if (stat) { free(stat); stat=0; } - if ((key_file[i]->stat)(key_file[i], (void*) &stat, 0)) + if ((key_file[i]->stat)(key_file[i], trx->all, (void*) &stat, 0)) goto err; /* purecov: inspected */ share->rec_per_key[i]= (stat->bt_ndata / (stat->bt_nkeys ? stat->bt_nkeys : 1)); @@ -2313,7 +2368,7 @@ int ha_berkeley::analyze(THD* thd, HA_CHECK_OPT* check_opt) free(stat); stat=0; } - if ((file->stat)(file, (void*) &stat, 0)) + if ((file->stat)(file, trx->all, (void*) &stat, 0)) goto err; /* purecov: inspected */ } pthread_mutex_lock(&share->mutex); @@ -2368,7 +2423,8 @@ int ha_berkeley::check(THD* thd, HA_CHECK_OPT* check_opt) (hidden_primary_key ? berkeley_cmp_hidden_key : berkeley_cmp_packed_key)); tmp_file->app_private= (void*) (table->key_info+table->primary_key); - fn_format(name_buff,share->table_name,"", ha_berkeley_ext, 2 | 4); + fn_format(name_buff,share->table_name.str,"", ha_berkeley_ext, + MY_UNPACK_FILENAME|MY_APPEND_EXT); if ((error=tmp_file->verify(tmp_file, name_buff, NullS, (FILE*) 0, hidden_primary_key ? 0 : DB_NOORDERCHK))) { @@ -2442,7 +2498,7 @@ static BDB_SHARE *get_share(const char *table_name, TABLE *table) share->rec_per_key = rec_per_key; share->table_name = tmp_name; share->table_name_length=length; - strmov(share->table_name,table_name); + strmov(share->table_name, table_name); share->key_file = key_file; share->key_type = key_type; if (my_hash_insert(&bdb_open_tables, (byte*) share)) @@ -2503,7 +2559,7 @@ void ha_berkeley::get_status() if (!(share->status & STATUS_PRIMARY_KEY_INIT)) { (void) extra(HA_EXTRA_KEYREAD); - index_init(primary_key); + index_init(primary_key, 0); if (!index_last(table->record[1])) share->auto_ident=uint5korr(current_ident); index_end(); @@ -2514,7 +2570,8 @@ void ha_berkeley::get_status() char name_buff[FN_REFLEN]; uint open_mode= (((table->db_stat & HA_READ_ONLY) ? DB_RDONLY : 0) | DB_THREAD); - fn_format(name_buff, share->table_name,"", ha_berkeley_ext, 2 | 4); + fn_format(name_buff, share->table_name, "", ha_berkeley_ext, + MY_UNPACK_FILENAME|MY_APPEND_EXT); if (!db_create(&share->status_block, db_env, 0)) { if (share->status_block->open(share->status_block, NULL, name_buff, @@ -2528,7 +2585,7 @@ void ha_berkeley::get_status() if (!(share->status & STATUS_ROW_COUNT_INIT) && share->status_block) { share->org_rows= share->rows= - table->s->max_rows ? table->s->max_rows : HA_BERKELEY_MAX_ROWS; + table_share->max_rows ? table_share->max_rows : HA_BERKELEY_MAX_ROWS; if (!share->status_block->cursor(share->status_block, 0, &cursor, 0)) { DBT row; @@ -2543,7 +2600,7 @@ void ha_berkeley::get_status() uint i; uchar *pos=(uchar*) row.data; share->org_rows=share->rows=uint4korr(pos); pos+=4; - for (i=0 ; i < table->s->keys ; i++) + for (i=0 ; i < table_share->keys ; i++) { share->rec_per_key[i]=uint4korr(pos); pos+=4; @@ -2595,8 +2652,9 @@ static void update_status(BDB_SHARE *share, TABLE *table) goto end; /* purecov: inspected */ share->status_block->set_flags(share->status_block,0); /* purecov: inspected */ if (share->status_block->open(share->status_block, NULL, - fn_format(name_buff,share->table_name,"", - ha_berkeley_ext,2 | 4), + fn_format(name_buff,share->table_name, + "", ha_berkeley_ext, + MY_UNPACK_FILENAME|MY_APPEND_EXT), "status", DB_BTREE, DB_THREAD | DB_CREATE, my_umask)) /* purecov: inspected */ goto end; /* purecov: inspected */ @@ -2608,7 +2666,7 @@ static void update_status(BDB_SHARE *share, TABLE *table) { int4store(pos,share->rec_per_key[i]); pos+=4; } - DBUG_PRINT("info",("updating status for %s",share->table_name)); + DBUG_PRINT("info",("updating status for %s", share->table_name)); (void) write_status(share->status_block, rec_buff, (uint) (pos-rec_buff)); share->status&= ~STATUS_BDB_ANALYZE; @@ -2638,7 +2696,7 @@ int ha_berkeley::cmp_ref(const byte *ref1, const byte *ref2) int result; Field *field; - KEY *key_info=table->key_info+table->s->primary_key; + KEY *key_info=table->key_info+table_share->primary_key; KEY_PART_INFO *key_part=key_info->key_part; KEY_PART_INFO *end=key_part+key_info->key_parts; @@ -2656,4 +2714,13 @@ int ha_berkeley::cmp_ref(const byte *ref1, const byte *ref2) return 0; } -#endif /* HAVE_BERKELEY_DB */ + +bool ha_berkeley::check_if_incompatible_data(HA_CREATE_INFO *info, + uint table_changes) +{ + if (table_changes < IS_EQUAL_YES) + return COMPATIBLE_DATA_NO; + return COMPATIBLE_DATA_YES; +} + + diff --git a/sql/ha_berkeley.h b/sql/ha_berkeley.h index 16e4db59c10..21b618b8d6d 100644 --- a/sql/ha_berkeley.h +++ b/sql/ha_berkeley.h @@ -84,7 +84,7 @@ class ha_berkeley: public handler DBT *get_pos(DBT *to, byte *pos); public: - ha_berkeley(TABLE *table_arg); + ha_berkeley(TABLE_SHARE *table_arg); ~ha_berkeley() {} const char *table_type() const { return "BerkeleyDB"; } ulong index_flags(uint idx, uint part, bool all_parts) const; @@ -92,7 +92,7 @@ class ha_berkeley: public handler const char **bas_ext() const; ulong table_flags(void) const { return int_table_flags; } uint max_supported_keys() const { return MAX_KEY-1; } - uint extra_rec_buf_length() { return BDB_HIDDEN_PRIMARY_KEY_LENGTH; } + uint extra_rec_buf_length() const { return BDB_HIDDEN_PRIMARY_KEY_LENGTH; } ha_rows estimate_rows_upper_bound(); uint max_supported_key_length() const { return UINT_MAX32; } uint max_supported_key_part_length() const { return UINT_MAX32; } @@ -106,7 +106,7 @@ class ha_berkeley: public handler int write_row(byte * buf); int update_row(const byte * old_data, byte * new_data); int delete_row(const byte * buf); - int index_init(uint index); + int index_init(uint index, bool sorted); int index_end(); int index_read(byte * buf, const byte * key, uint key_len, enum ha_rkey_function find_flag); @@ -154,17 +154,25 @@ class ha_berkeley: public handler uint8 table_cache_type() { return HA_CACHE_TBL_TRANSACT; } bool primary_key_is_clustered() { return true; } int cmp_ref(const byte *ref1, const byte *ref2); + bool check_if_incompatible_data(HA_CREATE_INFO *info, uint table_changes); }; +extern const u_int32_t bdb_DB_TXN_NOSYNC; +extern const u_int32_t bdb_DB_RECOVER; +extern const u_int32_t bdb_DB_PRIVATE; +extern const u_int32_t bdb_DB_DIRECT_DB; +extern const u_int32_t bdb_DB_DIRECT_LOG; extern bool berkeley_shared_data; extern u_int32_t berkeley_init_flags,berkeley_env_flags, berkeley_lock_type, berkeley_lock_types[]; -extern ulong berkeley_cache_size, berkeley_max_lock, berkeley_log_buffer_size; +extern ulong berkeley_max_lock, berkeley_log_buffer_size; +extern ulonglong berkeley_cache_size; +extern ulong berkeley_region_size, berkeley_cache_parts; extern char *berkeley_home, *berkeley_tmpdir, *berkeley_logdir; extern long berkeley_lock_scan_time; extern TYPELIB berkeley_lock_typelib; bool berkeley_init(void); -bool berkeley_end(void); +int berkeley_end(ha_panic_function type); bool berkeley_flush_logs(void); -int berkeley_show_logs(Protocol *protocol); +bool berkeley_show_status(THD *thd, stat_print_fn *print, enum ha_stat_type); diff --git a/sql/ha_blackhole.cc b/sql/ha_blackhole.cc index 2505919af39..7d28344a0a4 100644 --- a/sql/ha_blackhole.cc +++ b/sql/ha_blackhole.cc @@ -20,13 +20,17 @@ #endif #include "mysql_priv.h" -#ifdef HAVE_BLACKHOLE_DB #include "ha_blackhole.h" +/* Static declarations for handlerton */ + +static handler *blackhole_create_handler(TABLE_SHARE *table); + /* Blackhole storage engine handlerton */ handlerton blackhole_hton= { + MYSQL_HANDLERTON_INTERFACE_VERSION, "BLACKHOLE", SHOW_OPTION_YES, "/dev/null storage engine (anything you write to it disappears)", @@ -47,14 +51,33 @@ handlerton blackhole_hton= { NULL, /* create_cursor_read_view */ NULL, /* set_cursor_read_view */ NULL, /* close_cursor_read_view */ - HTON_CAN_RECREATE + blackhole_create_handler, /* Create a new handler */ + NULL, /* Drop a database */ + NULL, /* Panic call */ + NULL, /* Start Consistent Snapshot */ + NULL, /* Flush logs */ + NULL, /* Show status */ + NULL, /* Partition flags */ + NULL, /* Alter table flags */ + NULL, /* Alter Tablespace */ + NULL, /* Fill FILES table */ + HTON_CAN_RECREATE, + NULL, /* binlog_func */ + NULL /* binlog_log_query */ }; + +static handler *blackhole_create_handler(TABLE_SHARE *table) +{ + return new ha_blackhole(table); +} + + /***************************************************************************** ** BLACKHOLE tables *****************************************************************************/ -ha_blackhole::ha_blackhole(TABLE *table_arg) +ha_blackhole::ha_blackhole(TABLE_SHARE *table_arg) :handler(&blackhole_hton, table_arg) {} @@ -93,13 +116,12 @@ int ha_blackhole::create(const char *name, TABLE *table_arg, const char *ha_blackhole::index_type(uint key_number) { DBUG_ENTER("ha_blackhole::index_type"); - DBUG_RETURN((table->key_info[key_number].flags & HA_FULLTEXT) ? + DBUG_RETURN((table_share->key_info[key_number].flags & HA_FULLTEXT) ? "FULLTEXT" : - (table->key_info[key_number].flags & HA_SPATIAL) ? + (table_share->key_info[key_number].flags & HA_SPATIAL) ? "SPATIAL" : - (table->key_info[key_number].algorithm == HA_KEY_ALG_RTREE) ? - "RTREE" : - "BTREE"); + (table_share->key_info[key_number].algorithm == + HA_KEY_ALG_RTREE) ? "RTREE" : "BTREE"); } int ha_blackhole::write_row(byte * buf) @@ -227,4 +249,3 @@ int ha_blackhole::index_last(byte * buf) DBUG_RETURN(HA_ERR_END_OF_FILE); } -#endif /* HAVE_BLACKHOLE_DB */ diff --git a/sql/ha_blackhole.h b/sql/ha_blackhole.h index 7238147a06a..15e12659aa0 100644 --- a/sql/ha_blackhole.h +++ b/sql/ha_blackhole.h @@ -28,7 +28,7 @@ class ha_blackhole: public handler THR_LOCK thr_lock; public: - ha_blackhole(TABLE *table_arg); + ha_blackhole(TABLE_SHARE *table_arg); ~ha_blackhole() { } @@ -49,7 +49,7 @@ public: } ulong index_flags(uint inx, uint part, bool all_parts) const { - return ((table->key_info[inx].algorithm == HA_KEY_ALG_FULLTEXT) ? + return ((table_share->key_info[inx].algorithm == HA_KEY_ALG_FULLTEXT) ? 0 : HA_READ_NEXT | HA_READ_PREV | HA_READ_RANGE | HA_READ_ORDER | HA_KEYREAD_ONLY); } @@ -84,4 +84,5 @@ public: THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to, enum thr_lock_type lock_type); + bool has_transactions() { return 1; } }; diff --git a/sql/ha_federated.cc b/sql/ha_federated.cc index af7c987e477..976f3739386 100644 --- a/sql/ha_federated.cc +++ b/sql/ha_federated.cc @@ -351,21 +351,23 @@ #pragma implementation // gcc: Class implementation #endif -#ifdef HAVE_FEDERATED_DB #include "ha_federated.h" #include "m_string.h" /* Variables for federated share methods */ -static HASH federated_open_tables; // Hash used to track open - // tables -pthread_mutex_t federated_mutex; // This is the mutex we use to - // init the hash -static int federated_init= FALSE; // Variable for checking the - // init state of hash +static HASH federated_open_tables; // To track open tables +pthread_mutex_t federated_mutex; // To init the hash +static int federated_init= FALSE; // Checking the state of hash + +/* Static declaration for handerton */ +static handler *federated_create_handler(TABLE_SHARE *table); +static int federated_commit(THD *thd, bool all); +static int federated_rollback(THD *thd, bool all); /* Federated storage engine handlerton */ handlerton federated_hton= { + MYSQL_HANDLERTON_INTERFACE_VERSION, "FEDERATED", SHOW_OPTION_YES, "Federated MySQL storage engine", @@ -377,8 +379,8 @@ handlerton federated_hton= { NULL, /* savepoint */ NULL, /* rollback to savepoint */ NULL, /* release savepoint */ - NULL, /* commit */ - NULL, /* rollback */ + federated_commit, /* commit */ + federated_rollback, /* rollback */ NULL, /* prepare */ NULL, /* recover */ NULL, /* commit_by_xid */ @@ -386,11 +388,29 @@ handlerton federated_hton= { NULL, /* create_cursor_read_view */ NULL, /* set_cursor_read_view */ NULL, /* close_cursor_read_view */ - HTON_ALTER_NOT_SUPPORTED + federated_create_handler, /* Create a new handler */ + NULL, /* Drop a database */ + federated_db_end, /* Panic call */ + NULL, /* Start Consistent Snapshot */ + NULL, /* Flush logs */ + NULL, /* Show status */ + NULL, /* Partition flags */ + NULL, /* Alter table flags */ + NULL, /* Alter Tablespace */ + NULL, /* Fill FILES table */ + HTON_ALTER_NOT_SUPPORTED, + NULL, /* binlog_func */ + NULL /* binlog_log_query */ }; -/* Function we use in the creation of our hash to get key. */ +static handler *federated_create_handler(TABLE_SHARE *table) +{ + return new ha_federated(table); +} + + +/* Function we use in the creation of our hash to get key */ static byte *federated_get_key(FEDERATED_SHARE *share, uint *length, my_bool not_used __attribute__ ((unused))) @@ -416,16 +436,14 @@ bool federated_db_init() DBUG_ENTER("federated_db_init"); if (pthread_mutex_init(&federated_mutex, MY_MUTEX_INIT_FAST)) goto error; - if (hash_init(&federated_open_tables, system_charset_info, 32, 0, 0, + if (!hash_init(&federated_open_tables, system_charset_info, 32, 0, 0, (hash_get_key) federated_get_key, 0, 0)) { - VOID(pthread_mutex_destroy(&federated_mutex)); - } - else - { federated_init= TRUE; DBUG_RETURN(FALSE); } + + VOID(pthread_mutex_destroy(&federated_mutex)); error: have_federated_db= SHOW_OPTION_DISABLED; // If we couldn't use handler DBUG_RETURN(TRUE); @@ -437,13 +455,12 @@ error: SYNOPSIS federated_db_end() - void RETURN FALSE OK */ -bool federated_db_end() +int federated_db_end(ha_panic_function type) { if (federated_init) { @@ -451,9 +468,10 @@ bool federated_db_end() VOID(pthread_mutex_destroy(&federated_mutex)); } federated_init= 0; - return FALSE; + return 0; } + /* Check (in create) whether the tables exists, and that it can be connected to @@ -587,12 +605,12 @@ static int parse_url_error(FEDERATED_SHARE *share, TABLE *table, int error_num) SYNOPSIS parse_url() - share pointer to FEDERATED share - table pointer to current TABLE class - table_create_flag determines what error to throw + share pointer to FEDERATED share + table pointer to current TABLE class + table_create_flag determines what error to throw DESCRIPTION - populates the share with information about the connection + Populates the share with information about the connection to the foreign database that will serve as the data source. This string must be specified (currently) in the "comment" field, listed in the CREATE TABLE statement. @@ -611,7 +629,7 @@ static int parse_url_error(FEDERATED_SHARE *share, TABLE *table, int error_num) ***IMPORTANT*** Currently, only "mysql://" is supported. - 'password' and 'port' are both optional. + 'password' and 'port' are both optional. RETURN VALUE 0 success @@ -629,10 +647,10 @@ static int parse_url(FEDERATED_SHARE *share, TABLE *table, share->port= 0; share->socket= 0; - DBUG_PRINT("info", ("Length %d \n", table->s->connect_string.length)); - DBUG_PRINT("info", ("String %.*s \n", table->s->connect_string.length, + DBUG_PRINT("info", ("Length: %d", table->s->connect_string.length)); + DBUG_PRINT("info", ("String: '%.*s'", table->s->connect_string.length, table->s->connect_string.str)); - share->scheme= my_strdup_with_length((const byte*)table->s-> + share->scheme= my_strndup((const byte*)table->s-> connect_string.str, table->s->connect_string.length, MYF(0)); @@ -721,8 +739,8 @@ static int parse_url(FEDERATED_SHARE *share, TABLE *table, } DBUG_PRINT("info", - ("scheme %s username %s password %s \ - hostname %s port %d database %s tablename %s\n", + ("scheme: %s username: %s password: %s \ + hostname: %s port: %d database: %s tablename: %s", share->scheme, share->username, share->password, share->hostname, share->port, share->database, share->table_name)); @@ -738,11 +756,13 @@ error: ** FEDERATED tables *****************************************************************************/ -ha_federated::ha_federated(TABLE *table_arg) +ha_federated::ha_federated(TABLE_SHARE *table_arg) :handler(&federated_hton, table_arg), mysql(0), stored_result(0), scan_flag(0), ref_length(sizeof(MYSQL_ROW_OFFSET)), current_position(0) -{} +{ + trx_next= 0; +} /* @@ -750,8 +770,8 @@ ha_federated::ha_federated(TABLE *table_arg) SYNOPSIS convert_row_to_internal_format() - record Byte pointer to record - row MySQL result set row from fetchrow() + record Byte pointer to record + row MySQL result set row from fetchrow() DESCRIPTION This method simply iterates through a row returned via fetchrow with @@ -762,7 +782,7 @@ ha_federated::ha_federated(TABLE *table_arg) RETURN VALUE 0 After fields have had field values stored from record - */ +*/ uint ha_federated::convert_row_to_internal_format(byte *record, MYSQL_ROW row) { @@ -773,24 +793,23 @@ uint ha_federated::convert_row_to_internal_format(byte *record, MYSQL_ROW row) lengths= mysql_fetch_lengths(stored_result); memset(record, 0, table->s->null_bytes); - for (field= table->field; *field; field++) + for (field= table->field; *field; field++, row++, lengths++) { /* index variable to move us through the row at the same iterative step as the field */ - int x= field - table->field; my_ptrdiff_t old_ptr; old_ptr= (my_ptrdiff_t) (record - table->record[0]); - (*field)->move_field(old_ptr); - if (!row[x]) + (*field)->move_field_offset(old_ptr); + if (!*row) (*field)->set_null(); else { (*field)->set_notnull(); - (*field)->store(row[x], lengths[x], &my_charset_bin); + (*field)->store(*row, *lengths, &my_charset_bin); } - (*field)->move_field(-old_ptr); + (*field)->move_field_offset(-old_ptr); } DBUG_RETURN(0); @@ -1195,8 +1214,8 @@ bool ha_federated::create_where_from_key(String *to, DBUG_RETURN(1); } else - /* LIKE */ { + /* LIKE */ if (emit_key_part_name(&tmp, key_part) || tmp.append(FEDERATED_LIKE) || emit_key_part_element(&tmp, key_part, needs_quotes, 1, ptr, @@ -1308,16 +1327,16 @@ static FEDERATED_SHARE *get_share(const char *table_name, TABLE *table) /* In order to use this string, we must first zero it's length, or it will contain garbage - */ + */ query.length(0); pthread_mutex_lock(&federated_mutex); - tmp_table_name= (char *)table->s->table_name; - tmp_table_name_length= (uint) strlen(tmp_table_name); + tmp_table_name= table->s->table_name.str; + tmp_table_name_length= table->s->table_name.length; if (!(share= (FEDERATED_SHARE *) hash_search(&federated_open_tables, (byte*) table_name, - strlen(table_name)))) + tmp_table_name_length))) { query.set_charset(system_charset_info); query.append(FEDERATED_SELECT); @@ -1328,7 +1347,7 @@ static FEDERATED_SHARE *get_share(const char *table_name, TABLE *table) query.append(FEDERATED_BTICK); query.append(FEDERATED_COMMA); } - query.length(query.length()- strlen(FEDERATED_COMMA)); + query.length(query.length()- (FEDERATED_COMMA_LEN - 1)); query.append(FEDERATED_FROM); query.append(FEDERATED_BTICK); @@ -1352,7 +1371,6 @@ static FEDERATED_SHARE *get_share(const char *table_name, TABLE *table) share->select_query= select_query; strmov(share->select_query, query.ptr()); share->use_count= 0; - share->table_name_length= strlen(share->table_name); DBUG_PRINT("info", ("share->select_query %s", share->select_query)); @@ -1472,10 +1490,11 @@ int ha_federated::open(const char *name, int mode, uint test_if_locked) } /* Since we do not support transactions at this version, we can let the client - API silently reconnect. For future versions, we will need more logic to deal - with transactions + API silently reconnect. For future versions, we will need more logic to + deal with transactions */ mysql->reconnect= 1; + DBUG_RETURN(0); } @@ -1548,6 +1567,7 @@ inline uint field_in_record_is_null(TABLE *table, DBUG_RETURN(0); } + /* write_row() inserts a row. No extra() hint is given currently if a bulk load is happeneding. buf() is a byte array of data. You can use the field @@ -1565,9 +1585,6 @@ inline uint field_in_record_is_null(TABLE *table, int ha_federated::write_row(byte *buf) { bool has_fields= FALSE; - uint all_fields_have_same_query_id= 1; - ulong current_query_id= 1; - ulong tmp_query_id= 1; char insert_buffer[FEDERATED_QUERY_BUFFER_SIZE]; char values_buffer[FEDERATED_QUERY_BUFFER_SIZE]; char insert_field_value_buffer[STRING_BUFFER_USUAL_SIZE]; @@ -1595,14 +1612,6 @@ int ha_federated::write_row(byte *buf) table->timestamp_field->set_time(); /* - get the current query id - the fields that we add to the insert - statement to send to the foreign will not be appended unless they match - this query id - */ - current_query_id= table->in_use->query_id; - DBUG_PRINT("info", ("current query id %d", current_query_id)); - - /* start both our field and field values strings */ insert_string.append(FEDERATED_INSERT); @@ -1615,21 +1624,8 @@ int ha_federated::write_row(byte *buf) values_string.append(FEDERATED_OPENPAREN); /* - Even if one field is different, all_fields_same_query_id can't remain - 0 if it remains 0, then that means no fields were specified in the query - such as in the case of INSERT INTO table VALUES (val1, val2, valN) - - */ - for (field= table->field; *field; field++) - { - if (field > table->field && tmp_query_id != (*field)->query_id) - all_fields_have_same_query_id= 0; - - tmp_query_id= (*field)->query_id; - } - /* loop through the field pointer array, add any fields to both the values - list and the fields list that match the current query id + list and the fields list that is part of the write set You might ask "Why an index variable (has_fields) ?" My answer is that we need to count how many fields we actually need @@ -1637,8 +1633,7 @@ int ha_federated::write_row(byte *buf) for (field= table->field; *field; field++) { /* if there is a query id and if it's equal to the current query id */ - if (((*field)->query_id && (*field)->query_id == current_query_id) - || all_fields_have_same_query_id) + if (ha_get_bit_in_write_set((*field)->fieldnr)) { /* There are some fields. This will be used later to determine @@ -1853,15 +1848,15 @@ int ha_federated::update_row(const byte *old_data, byte *new_data) update_string.append(FEDERATED_BTICK); update_string.append(FEDERATED_SET); -/* - In this loop, we want to match column names to values being inserted - (while building INSERT statement). + /* + In this loop, we want to match column names to values being inserted + (while building INSERT statement). - Iterate through table->field (new data) and share->old_filed (old_data) - using the same index to created an SQL UPDATE statement, new data is - used to create SET field=value and old data is used to create WHERE - field=oldvalue - */ + Iterate through table->field (new data) and share->old_filed (old_data) + using the same index to created an SQL UPDATE statement, new data is + used to create SET field=value and old data is used to create WHERE + field=oldvalue + */ for (Field **field= table->field; *field; field++) { @@ -2082,7 +2077,7 @@ int ha_federated::index_read_idx(byte *buf, uint index, const byte *key, This basically says that the record in table->record[0] is legal, and that it is ok to use this record, for whatever reason, such as with a join (without it, joins will not work) - */ + */ table->status= 0; retval= rnd_next(buf); @@ -2100,11 +2095,11 @@ error: } /* Initialized at each key walk (called multiple times unlike rnd_init()) */ -int ha_federated::index_init(uint keynr) +int ha_federated::index_init(uint keynr, bool sorted) { DBUG_ENTER("ha_federated::index_init"); DBUG_PRINT("info", - ("table: '%s' key: %d", table->s->table_name, keynr)); + ("table: '%s' key: %d", table->s->table_name.str, keynr)); active_index= keynr; DBUG_RETURN(0); } @@ -2285,6 +2280,7 @@ int ha_federated::rnd_end() DBUG_RETURN(retval); } + int ha_federated::index_end(void) { DBUG_ENTER("ha_federated::index_end"); @@ -2292,6 +2288,7 @@ int ha_federated::index_end(void) DBUG_RETURN(0); } + /* This is called for each row of the table scan. When you run out of records you should return HA_ERR_END_OF_FILE. Fill buff up with the row information. @@ -2669,4 +2666,151 @@ bool ha_federated::get_error_message(int error, String* buf) DBUG_RETURN(FALSE); } -#endif /* HAVE_FEDERATED_DB */ +int ha_federated::external_lock(THD *thd, int lock_type) +{ + int error= 0; + ha_federated *trx= (ha_federated *)thd->ha_data[federated_hton.slot]; + DBUG_ENTER("ha_federated::external_lock"); + + if (lock_type != F_UNLCK) + { + DBUG_PRINT("info",("federated not lock F_UNLCK")); + if (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) + { + DBUG_PRINT("info",("federated autocommit")); + /* + This means we are doing an autocommit + */ + error= connection_autocommit(TRUE); + if (error) + { + DBUG_PRINT("info", ("error setting autocommit TRUE: %d", error)); + DBUG_RETURN(error); + } + trans_register_ha(thd, FALSE, &federated_hton); + } + else + { + DBUG_PRINT("info",("not autocommit")); + if (!trx) + { + /* + This is where a transaction gets its start + */ + error= connection_autocommit(FALSE); + if (error) + { + DBUG_PRINT("info", ("error setting autocommit FALSE: %d", error)); + DBUG_RETURN(error); + } + thd->ha_data[federated_hton.slot]= this; + trans_register_ha(thd, TRUE, &federated_hton); + /* + Send a lock table to the remote end. + We do not support this at the moment + */ + if (thd->options & (OPTION_TABLE_LOCK)) + { + DBUG_PRINT("info", ("We do not support lock table yet")); + } + } + else + { + ha_federated *ptr; + for (ptr= trx; ptr; ptr= ptr->trx_next) + if (ptr == this) + break; + else if (!ptr->trx_next) + ptr->trx_next= this; + } + } + } + DBUG_RETURN(0); +} + + +static int federated_commit(THD *thd, bool all) +{ + int return_val= 0; + ha_federated *trx= (ha_federated *)thd->ha_data[federated_hton.slot]; + DBUG_ENTER("federated_commit"); + + if (all) + { + int error= 0; + ha_federated *ptr, *old= NULL; + for (ptr= trx; ptr; old= ptr, ptr= ptr->trx_next) + { + if (old) + old->trx_next= NULL; + error= ptr->connection_commit(); + if (error && !return_val); + return_val= error; + } + thd->ha_data[federated_hton.slot]= NULL; + } + + DBUG_PRINT("info", ("error val: %d", return_val)); + DBUG_RETURN(return_val); +} + + +static int federated_rollback(THD *thd, bool all) +{ + int return_val= 0; + ha_federated *trx= (ha_federated *)thd->ha_data[federated_hton.slot]; + DBUG_ENTER("federated_rollback"); + + if (all) + { + int error= 0; + ha_federated *ptr, *old= NULL; + for (ptr= trx; ptr; old= ptr, ptr= ptr->trx_next) + { + if (old) + old->trx_next= NULL; + error= ptr->connection_rollback(); + if (error && !return_val) + return_val= error; + } + thd->ha_data[federated_hton.slot]= NULL; + } + + DBUG_PRINT("info", ("error val: %d", return_val)); + DBUG_RETURN(return_val); +} + +int ha_federated::connection_commit() +{ + DBUG_ENTER("ha_federated::connection_commit"); + DBUG_RETURN(execute_simple_query("COMMIT", 6)); +} + + +int ha_federated::connection_rollback() +{ + DBUG_ENTER("ha_federated::connection_rollback"); + DBUG_RETURN(execute_simple_query("ROLLBACK", 8)); +} + + +int ha_federated::connection_autocommit(bool state) +{ + const char *text; + DBUG_ENTER("ha_federated::connection_autocommit"); + text= (state == true) ? "SET AUTOCOMMIT=1" : "SET AUTOCOMMIT=0"; + DBUG_RETURN(execute_simple_query(text, 16)); +} + + +int ha_federated::execute_simple_query(const char *query, int len) +{ + DBUG_ENTER("ha_federated::execute_simple_query"); + + if (mysql_real_query(mysql, query, len)) + { + DBUG_RETURN(stash_remote_error()); + } + DBUG_RETURN(0); +} + diff --git a/sql/ha_federated.h b/sql/ha_federated.h index 08203d7e51d..953f4208bc5 100644 --- a/sql/ha_federated.h +++ b/sql/ha_federated.h @@ -173,13 +173,15 @@ private: int stash_remote_error(); public: - ha_federated(TABLE *table_arg); - ~ha_federated() - { - } + ha_federated(TABLE_SHARE *table_arg); + ~ha_federated() {} /* The name that will be used for display purposes */ const char *table_type() const { return "FEDERATED"; } /* + Next pointer used in transaction + */ + ha_federated *trx_next; + /* The name of the index type that will be used for display don't implement this method unless you really have indexes */ @@ -230,8 +232,7 @@ public: */ double scan_time() { - DBUG_PRINT("info", - ("records %d", records)); + DBUG_PRINT("info", ("records %lu", (ulong) records)); return (double)(records*1000); } /* @@ -259,7 +260,7 @@ public: int write_row(byte *buf); int update_row(const byte *old_data, byte *new_data); int delete_row(const byte *buf); - int index_init(uint keynr); + int index_init(uint keynr, bool sorted); int index_read(byte *buf, const byte *key, uint key_len, enum ha_rkey_function find_flag); int index_read_idx(byte *buf, uint idx, const byte *key, @@ -299,7 +300,14 @@ public: THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to, enum thr_lock_type lock_type); //required virtual bool get_error_message(int error, String *buf); + int external_lock(THD *thd, int lock_type); + int connection_commit(); + int connection_rollback(); + bool has_transactions() { return 1; } + int connection_autocommit(bool state); + int execute_simple_query(const char *query, int len); }; bool federated_db_init(void); -bool federated_db_end(void); +int federated_db_end(ha_panic_function type); + diff --git a/sql/ha_heap.cc b/sql/ha_heap.cc index 739569e305a..96f760a7a44 100644 --- a/sql/ha_heap.cc +++ b/sql/ha_heap.cc @@ -23,7 +23,11 @@ #include <myisampack.h> #include "ha_heap.h" + +static handler *heap_create_handler(TABLE_SHARE *table); + handlerton heap_hton= { + MYSQL_HANDLERTON_INTERFACE_VERSION, "MEMORY", SHOW_OPTION_YES, "Hash based, stored in memory, useful for temporary tables", @@ -44,14 +48,32 @@ handlerton heap_hton= { NULL, /* create_cursor_read_view */ NULL, /* set_cursor_read_view */ NULL, /* close_cursor_read_view */ - HTON_CAN_RECREATE + heap_create_handler, /* Create a new handler */ + NULL, /* Drop a database */ + heap_panic, /* Panic call */ + NULL, /* Start Consistent Snapshot */ + NULL, /* Flush logs */ + NULL, /* Show status */ + NULL, /* Partition flags */ + NULL, /* Alter table flags */ + NULL, /* Alter Tablespace */ + NULL, /* Fill Files Table */ + HTON_CAN_RECREATE, + NULL, /* binlog_func */ + NULL /* binlog_log_query */ }; +static handler *heap_create_handler(TABLE_SHARE *table) +{ + return new ha_heap(table); +} + + /***************************************************************************** ** HEAP tables *****************************************************************************/ -ha_heap::ha_heap(TABLE *table_arg) +ha_heap::ha_heap(TABLE_SHARE *table_arg) :handler(&heap_hton, table_arg), file(0), records_changed(0), key_stat_version(0) {} @@ -503,11 +525,18 @@ THR_LOCK_DATA **ha_heap::store_lock(THD *thd, int ha_heap::delete_table(const char *name) { char buff[FN_REFLEN]; - int error= heap_delete_table(fn_format(buff,name,"","", - MY_REPLACE_EXT|MY_UNPACK_FILENAME)); + int error= heap_delete_table(name); return error == ENOENT ? 0 : error; } + +void ha_heap::drop_table(const char *name) +{ + heap_drop_table(file); + close(); +} + + int ha_heap::rename_table(const char * from, const char * to) { return heap_rename(from,to); @@ -545,7 +574,6 @@ int ha_heap::create(const char *name, TABLE *table_arg, ha_rows max_rows; HP_KEYDEF *keydef; HA_KEYSEG *seg; - char buff[FN_REFLEN]; int error; TABLE_SHARE *share= table_arg->s; bool found_real_auto_increment= 0; @@ -626,7 +654,7 @@ int ha_heap::create(const char *name, TABLE *table_arg, } } mem_per_row+= MY_ALIGN(share->reclength + 1, sizeof(char*)); - max_rows = (ha_rows) (table->in_use->variables.max_heap_table_size / + max_rows = (ha_rows) (table_arg->in_use->variables.max_heap_table_size / mem_per_row); if (table_arg->found_next_number_field) { @@ -641,8 +669,7 @@ int ha_heap::create(const char *name, TABLE *table_arg, hp_create_info.max_table_size=current_thd->variables.max_heap_table_size; hp_create_info.with_auto_increment= found_real_auto_increment; max_rows = (ha_rows) (hp_create_info.max_table_size / mem_per_row); - error= heap_create(fn_format(buff,name,"","", - MY_REPLACE_EXT|MY_UNPACK_FILENAME), + error= heap_create(name, keys, keydef, share->reclength, (ulong) ((share->max_rows < max_rows && share->max_rows) ? @@ -667,3 +694,15 @@ ulonglong ha_heap::get_auto_increment() ha_heap::info(HA_STATUS_AUTO); return auto_increment_value; } + + +bool ha_heap::check_if_incompatible_data(HA_CREATE_INFO *info, + uint table_changes) +{ + /* Check that auto_increment value was not changed */ + if ((table_changes != IS_EQUAL_YES && + info->used_fields & HA_CREATE_USED_AUTO) && + info->auto_increment_value != 0) + return COMPATIBLE_DATA_NO; + return COMPATIBLE_DATA_YES; +} diff --git a/sql/ha_heap.h b/sql/ha_heap.h index e2816abf0b6..9b9b7f90d90 100644 --- a/sql/ha_heap.h +++ b/sql/ha_heap.h @@ -31,7 +31,7 @@ class ha_heap: public handler uint records_changed; uint key_stat_version; public: - ha_heap(TABLE *table); + ha_heap(TABLE_SHARE *table); ~ha_heap() {} const char *table_type() const { @@ -40,8 +40,8 @@ public: } const char *index_type(uint inx) { - return ((table->key_info[inx].algorithm == HA_KEY_ALG_BTREE) ? "BTREE" : - "HASH"); + return ((table_share->key_info[inx].algorithm == HA_KEY_ALG_BTREE) ? + "BTREE" : "HASH"); } /* Rows also use a fixed-size format */ enum row_type get_row_type() const { return ROW_TYPE_FIXED; } @@ -54,7 +54,7 @@ public: } ulong index_flags(uint inx, uint part, bool all_parts) const { - return ((table->key_info[inx].algorithm == HA_KEY_ALG_BTREE) ? + return ((table_share->key_info[inx].algorithm == HA_KEY_ALG_BTREE) ? HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER | HA_READ_RANGE : HA_ONLY_WHOLE_INDEX); } @@ -94,6 +94,7 @@ public: int indexes_are_disabled(void); ha_rows records_in_range(uint inx, key_range *min_key, key_range *max_key); int delete_table(const char *from); + void drop_table(const char *name); int rename_table(const char * from, const char * to); int create(const char *name, TABLE *form, HA_CREATE_INFO *create_info); void update_create_info(HA_CREATE_INFO *create_info); @@ -106,6 +107,7 @@ public: HEAP_PTR ptr2=*(HEAP_PTR*)ref2; return ptr1 < ptr2? -1 : (ptr1 > ptr2? 1 : 0); } + bool check_if_incompatible_data(HA_CREATE_INFO *info, uint table_changes); private: void update_key_stats(); }; diff --git a/sql/ha_innodb.cc b/sql/ha_innodb.cc index 1b1326920ad..153c456c06c 100644 --- a/sql/ha_innodb.cc +++ b/sql/ha_innodb.cc @@ -34,7 +34,6 @@ have disables the InnoDB inlining in this file. */ #include "mysql_priv.h" #include "slave.h" -#ifdef HAVE_INNOBASE_DB #include <m_ctype.h> #include <hash.h> #include <myisampack.h> @@ -111,33 +110,35 @@ typedef byte mysql_byte; /* Include necessary InnoDB headers */ extern "C" { -#include "../innobase/include/univ.i" -#include "../innobase/include/os0file.h" -#include "../innobase/include/os0thread.h" -#include "../innobase/include/srv0start.h" -#include "../innobase/include/srv0srv.h" -#include "../innobase/include/trx0roll.h" -#include "../innobase/include/trx0trx.h" -#include "../innobase/include/trx0sys.h" -#include "../innobase/include/mtr0mtr.h" -#include "../innobase/include/row0ins.h" -#include "../innobase/include/row0mysql.h" -#include "../innobase/include/row0sel.h" -#include "../innobase/include/row0upd.h" -#include "../innobase/include/log0log.h" -#include "../innobase/include/lock0lock.h" -#include "../innobase/include/dict0crea.h" -#include "../innobase/include/btr0cur.h" -#include "../innobase/include/btr0btr.h" -#include "../innobase/include/fsp0fsp.h" -#include "../innobase/include/sync0sync.h" -#include "../innobase/include/fil0fil.h" -#include "../innobase/include/trx0xa.h" +#include "../storage/innobase/include/univ.i" +#include "../storage/innobase/include/os0file.h" +#include "../storage/innobase/include/os0thread.h" +#include "../storage/innobase/include/srv0start.h" +#include "../storage/innobase/include/srv0srv.h" +#include "../storage/innobase/include/trx0roll.h" +#include "../storage/innobase/include/trx0trx.h" +#include "../storage/innobase/include/trx0sys.h" +#include "../storage/innobase/include/mtr0mtr.h" +#include "../storage/innobase/include/row0ins.h" +#include "../storage/innobase/include/row0mysql.h" +#include "../storage/innobase/include/row0sel.h" +#include "../storage/innobase/include/row0upd.h" +#include "../storage/innobase/include/log0log.h" +#include "../storage/innobase/include/lock0lock.h" +#include "../storage/innobase/include/dict0crea.h" +#include "../storage/innobase/include/btr0cur.h" +#include "../storage/innobase/include/btr0btr.h" +#include "../storage/innobase/include/fsp0fsp.h" +#include "../storage/innobase/include/sync0sync.h" +#include "../storage/innobase/include/fil0fil.h" +#include "../storage/innobase/include/trx0xa.h" } #define HA_INNOBASE_ROWS_IN_TABLE 10000 /* to get optimization right */ #define HA_INNOBASE_RANGE_COUNT 100 +uint innobase_init_flags = 0; +ulong innobase_cache_size = 0; ulong innobase_large_page_size = 0; /* The default values for the following, type long or longlong, start-up @@ -165,6 +166,7 @@ char* innobase_unix_file_flush_method = NULL; /* Below we have boolean-valued start-up parameters, and their default values */ +uint innobase_flush_log_at_trx_commit = 1; ulong innobase_fast_shutdown = 1; my_bool innobase_log_archive = FALSE;/* unused */ my_bool innobase_use_doublewrite = TRUE; @@ -185,6 +187,8 @@ it every INNOBASE_WAKE_INTERVAL'th step. */ #define INNOBASE_WAKE_INTERVAL 32 ulong innobase_active_counter = 0; +char* innobase_home = NULL; + static HASH innobase_open_tables; #ifdef __NETWARE__ /* some special cleanup for NetWare */ @@ -201,8 +205,10 @@ static int innobase_rollback(THD* thd, bool all); static int innobase_rollback_to_savepoint(THD* thd, void *savepoint); static int innobase_savepoint(THD* thd, void *savepoint); static int innobase_release_savepoint(THD* thd, void *savepoint); +static handler *innobase_create_handler(TABLE_SHARE *table); handlerton innobase_hton = { + MYSQL_HANDLERTON_INTERFACE_VERSION, "InnoDB", SHOW_OPTION_YES, "Supports transactions, row-level locking, and foreign keys", @@ -223,9 +229,28 @@ handlerton innobase_hton = { innobase_create_cursor_view, innobase_set_cursor_view, innobase_close_cursor_view, - HTON_NO_FLAGS + innobase_create_handler, /* Create a new handler */ + innobase_drop_database, /* Drop a database */ + innobase_end, /* Panic call */ + innobase_start_trx_and_assign_read_view, /* Start Consistent Snapshot */ + innobase_flush_logs, /* Flush logs */ + innobase_show_status, /* Show status */ + NULL, /* Partition flags */ + NULL, /* Alter table flags */ + NULL, /* alter_tablespace */ + NULL, /* Fill FILES table */ + HTON_NO_FLAGS, + NULL, /* binlog_func */ + NULL /* binlog_log_query */ }; + +static handler *innobase_create_handler(TABLE_SHARE *table) +{ + return new ha_innobase(table); +} + + /********************************************************************* Commits a transaction in an InnoDB database. */ @@ -234,7 +259,7 @@ innobase_commit_low( /*================*/ trx_t* trx); /* in: transaction handle */ -struct show_var_st innodb_status_variables[]= { +SHOW_VAR innodb_status_variables[]= { {"buffer_pool_pages_data", (char*) &export_vars.innodb_buffer_pool_pages_data, SHOW_LONG}, {"buffer_pool_pages_dirty", @@ -334,7 +359,7 @@ innodb_srv_conc_enter_innodb( /*=========================*/ trx_t* trx) /* in: transaction handle */ { - if (UNIV_LIKELY(!srv_thread_concurrency)) { + if (UNIV_LIKELY(srv_thread_concurrency >= SRV_CONCURRENCY_THRESHOLD)) { return; } @@ -351,7 +376,7 @@ innodb_srv_conc_exit_innodb( /*========================*/ trx_t* trx) /* in: transaction handle */ { - if (UNIV_LIKELY(!srv_thread_concurrency)) { + if (UNIV_LIKELY(srv_thread_concurrency >= SRV_CONCURRENCY_THRESHOLD)) { return; } @@ -386,7 +411,7 @@ Call this function when mysqld passes control to the client. That is to avoid deadlocks on the adaptive hash S-latch possibly held by thd. For more documentation, see handler.cc. */ -void +int innobase_release_temporary_latches( /*===============================*/ THD *thd) @@ -395,7 +420,7 @@ innobase_release_temporary_latches( if (!innodb_inited) { - return; + return 0; } trx = (trx_t*) thd->ha_data[innobase_hton.slot]; @@ -403,6 +428,7 @@ innobase_release_temporary_latches( if (trx) { innobase_release_stat_resources(trx); } + return 0; } /************************************************************************ @@ -442,6 +468,10 @@ convert_error_code_to_mysql( return(HA_ERR_FOUND_DUPP_KEY); + } else if (error == (int) DB_FOREIGN_DUPLICATE_KEY) { + + return(HA_ERR_FOREIGN_DUPLICATE_KEY); + } else if (error == (int) DB_RECORD_NOT_FOUND) { return(HA_ERR_NO_ACTIVE_RECORD); @@ -800,16 +830,18 @@ check_trx_exists( /************************************************************************* Construct ha_innobase handler. */ -ha_innobase::ha_innobase(TABLE *table_arg) +ha_innobase::ha_innobase(TABLE_SHARE *table_arg) :handler(&innobase_hton, table_arg), int_table_flags(HA_REC_NOT_IN_SEQ | HA_NULL_IN_KEY | HA_CAN_INDEX_BLOBS | HA_CAN_SQL_HANDLER | HA_NOT_EXACT_COUNT | + HA_PRIMARY_KEY_ALLOW_RANDOM_ACCESS | HA_PRIMARY_KEY_IN_READ_INDEX | HA_CAN_GEOMETRY | HA_TABLE_SCAN_ON_INDEX), + last_dup_key((uint) -1), start_of_scan(0), num_write_row(0) {} @@ -978,11 +1010,6 @@ innobase_query_caching_of_table_permitted( sql_print_error("The calling thread is holding the adaptive " "search, latch though calling " "innobase_query_caching_of_table_permitted."); - - mutex_enter_noninline(&kernel_mutex); - trx_print(stderr, trx, 1024); - mutex_exit_noninline(&kernel_mutex); - ut_error; } innobase_release_stat_resources(trx); @@ -1340,6 +1367,7 @@ innobase_init(void) srv_log_archive_on = (ulint) innobase_log_archive; #endif /* UNIV_LOG_ARCHIVE */ srv_log_buffer_size = (ulint) innobase_log_buffer_size; + srv_flush_log_at_trx_commit = (ulint) innobase_flush_log_at_trx_commit; /* We set srv_pool_size here in units of 1 kB. InnoDB internally changes the value so that it becomes the number of database pages. */ @@ -1389,7 +1417,6 @@ innobase_init(void) ut_a(DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL == my_charset_latin1.number); - ut_a(DATA_MYSQL_BINARY_CHARSET_COLL == my_charset_bin.number); /* Store the latin1_swedish_ci character ordering table to InnoDB. For non-latin1_swedish_ci charsets we use the MySQL comparison functions, @@ -1449,8 +1476,8 @@ error: /*********************************************************************** Closes an InnoDB database. */ -bool -innobase_end(void) +int +innobase_end(ha_panic_function type) /*==============*/ /* out: TRUE if error */ { @@ -1814,7 +1841,7 @@ innobase_commit_complete( trx->active_trans = 0; - if (UNIV_UNLIKELY(srv_flush_log_at_trx_commit == 0)) { + if (srv_flush_log_at_trx_commit == 0) { return(0); } @@ -1955,6 +1982,11 @@ innobase_repl_report_sent_binlog( int cmp; ibool can_release_threads = 0; + if (!innodb_inited) { + + return 0; + } + /* If synchronous replication is not switched on, or this thd is sending binlog to a slave where we do not need synchronous replication, then return immediately */ @@ -2846,6 +2878,8 @@ ha_innobase::store_key_val_for_row( char* buff_start = buff; enum_field_types mysql_type; Field* field; + ulint blob_len; + byte* blob_data; ibool is_null; DBUG_ENTER("store_key_val_for_row"); @@ -2900,18 +2934,14 @@ ha_innobase::store_key_val_for_row( ulint len; byte* data; ulint key_len; - ulint true_len; CHARSET_INFO* cs; int error=0; - key_len = key_part->length; - if (is_null) { - buff += key_len + 2; + buff += key_part->length + 2; continue; } - cs = field->charset(); lenlen = (ulint) (((Field_varstring*)field)->length_bytes); @@ -2921,33 +2951,32 @@ ha_innobase::store_key_val_for_row( + (ulint)get_field_offset(table, field)), lenlen); - true_len = len; - - /* For multi byte character sets we need to calculate - the true length of the key */ - - if (len > 0 && cs->mbmaxlen > 1) { - true_len = (ulint) cs->cset->well_formed_len(cs, - (const char *) data, - (const char *) data + len, - key_len / cs->mbmaxlen, - &error); - } - /* In a column prefix index, we may need to truncate the stored value: */ + + cs = key_part->field->charset(); + + if (cs->mbmaxlen > 1 && key_part->length > 0) { + key_len = (ulint) cs->cset->well_formed_len(cs, + (const char *) data, + (const char *) data + key_part->length, + key_part->length / cs->mbmaxlen, + &error); + } else { + key_len = key_part->length; + } - if (true_len > key_len) { - true_len = key_len; + if (len > key_len) { + len = key_len; } /* The length in a key value is always stored in 2 bytes */ - row_mysql_store_true_var_len((byte*)buff, true_len, 2); + row_mysql_store_true_var_len((byte*)buff, len, 2); buff += 2; - memcpy(buff, data, true_len); + memcpy(buff, data, len); /* Note that we always reserve the maximum possible length of the true VARCHAR in the key value, though @@ -2955,7 +2984,7 @@ ha_innobase::store_key_val_for_row( actual data. The rest of the space was reset to zero in the bzero() call above. */ - buff += key_len; + buff += key_part->length; } else if (mysql_type == FIELD_TYPE_TINY_BLOB || mysql_type == FIELD_TYPE_MEDIUM_BLOB @@ -2965,66 +2994,58 @@ ha_innobase::store_key_val_for_row( CHARSET_INFO* cs; ulint key_len; ulint len; - ulint true_len; int error=0; - ulint blob_len; - byte* blob_data; ut_a(key_part->key_part_flag & HA_PART_KEY_SEG); - key_len = key_part->length; - if (is_null) { - buff += key_len + 2; + buff += key_part->length + 2; continue; } - cs = field->charset(); - blob_data = row_mysql_read_blob_ref(&blob_len, (byte*) (record + (ulint)get_field_offset(table, field)), (ulint) field->pack_length()); - true_len = blob_len; - ut_a(get_field_offset(table, field) == key_part->offset); - /* For multi byte character sets we need to calculate - the true length of the key */ - - if (blob_len > 0 && cs->mbmaxlen > 1) { - true_len = (ulint) cs->cset->well_formed_len(cs, - (const char *) blob_data, - (const char *) blob_data - + blob_len, - key_len / cs->mbmaxlen, - &error); - } - /* All indexes on BLOB and TEXT are column prefix indexes, and we may need to truncate the data to be stored in the key value: */ - if (true_len > key_len) { - true_len = key_len; + cs = key_part->field->charset(); + + if (cs->mbmaxlen > 1 && key_part->length > 0) { + key_len = (ulint) cs->cset->well_formed_len(cs, + (const char *) blob_data, + (const char *) blob_data + + key_part->length, + key_part->length / cs->mbmaxlen, + &error); + } else { + key_len = key_part->length; + } + + if (blob_len > key_len) { + blob_len = key_len; } /* MySQL reserves 2 bytes for the length and the storage of the number is little-endian */ innobase_write_to_2_little_endian( - (byte*)buff, true_len); + (byte*)buff, (ulint)blob_len); buff += 2; - memcpy(buff, blob_data, true_len); + memcpy(buff, blob_data, blob_len); /* Note that we always reserve the maximum possible length of the BLOB prefix in the key value. */ - buff += key_len; + buff += key_part->length; } else { /* Here we handle all other data types except the true VARCHAR, BLOB and TEXT. Note that the column @@ -3032,53 +3053,34 @@ ha_innobase::store_key_val_for_row( index. */ CHARSET_INFO* cs; - ulint true_len; - ulint key_len; + ulint len; const mysql_byte* src_start; int error=0; - enum_field_types real_type; - - key_len = key_part->length; if (is_null) { - buff += key_len; + buff += key_part->length; continue; } - src_start = record + key_part->offset; - real_type = field->real_type(); - true_len = key_len; - - /* Character set for the field is defined only - to fields whose type is string and real field - type is not enum or set. For these fields check - if character set is multi byte. */ - - if (real_type != FIELD_TYPE_ENUM - && real_type != FIELD_TYPE_SET - && ( mysql_type == MYSQL_TYPE_VAR_STRING - || mysql_type == MYSQL_TYPE_STRING)) { - - cs = field->charset(); - - /* For multi byte character sets we need to - calculate the true length of the key */ + /* In a column prefix index, we may need to truncate + the stored value: */ - if (key_len > 0 && cs->mbmaxlen > 1) { + cs = key_part->field->charset(); + src_start = record + key_part->offset; - true_len = (ulint) - cs->cset->well_formed_len(cs, - (const char *)src_start, - (const char *)src_start - + key_len, - key_len / cs->mbmaxlen, - &error); - } + if (key_part->length > 0 && cs->mbmaxlen > 1) { + len = (ulint) cs->cset->well_formed_len(cs, + (const char *) src_start, + (const char *) src_start + key_part->length, + key_part->length / cs->mbmaxlen, + &error); + } else { + len = key_part->length; } - memcpy(buff, src_start, true_len); - buff += true_len; + memcpy(buff, src_start, len); + buff+=len; /* Pad the unused space with spaces. Note that no padding is ever needed for UCS-2 because in MySQL, @@ -3086,10 +3088,10 @@ ha_innobase::store_key_val_for_row( support surrogate pairs, which are needed to represent characters in the range U+10000 to U+10FFFF. */ - if (true_len < key_len) { - ulint pad_len = key_len - true_len; - memset(buff, ' ', pad_len); - buff += pad_len; + if (len < key_part->length) { + len = key_part->length - len; + memset(buff, ' ', len); + buff+=len; } } } @@ -3219,7 +3221,8 @@ build_template( goto include_field; } - if (thd->query_id == field->query_id) { + if (table->file->ha_get_bit_in_read_set(i+1) || + table->file->ha_get_bit_in_write_set(i+1)) { /* This field is needed in the query */ goto include_field; @@ -3806,8 +3809,9 @@ ha_innobase::delete_row( } /************************************************************************** -Removes a new lock set on a row. This method does nothing unless the -option innodb_locks_unsafe_for_binlog is set.*/ +Removes a new lock set on a row, if it was not read optimistically. This can +be called after a row has been read in the processing of an UPDATE or a DELETE +query, if the option innodb_locks_unsafe_for_binlog is set. */ void ha_innobase::unlock_row(void) @@ -3817,7 +3821,7 @@ ha_innobase::unlock_row(void) DBUG_ENTER("ha_innobase::unlock_row"); - if (last_query_id != user_thd->query_id) { + if (UNIV_UNLIKELY(last_query_id != user_thd->query_id)) { ut_print_timestamp(stderr); sql_print_error("last_query_id is %lu != user_thd_query_id is " "%lu", (ulong) last_query_id, @@ -3825,13 +3829,46 @@ ha_innobase::unlock_row(void) mem_analyze_corruption((byte *) prebuilt->trx); ut_error; } - - if (srv_locks_unsafe_for_binlog) { + + switch (prebuilt->row_read_type) { + case ROW_READ_WITH_LOCKS: + if (!srv_locks_unsafe_for_binlog) { + break; + } + /* fall through */ + case ROW_READ_TRY_SEMI_CONSISTENT: row_unlock_for_mysql(prebuilt, FALSE); + break; + case ROW_READ_DID_SEMI_CONSISTENT: + prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT; + break; } DBUG_VOID_RETURN; +} + +/* See handler.h and row0mysql.h for docs on this function. */ +bool +ha_innobase::was_semi_consistent_read(void) +/*=======================================*/ +{ + row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; + return(prebuilt->row_read_type == ROW_READ_DID_SEMI_CONSISTENT); +} + +/* See handler.h and row0mysql.h for docs on this function. */ +void +ha_innobase::try_semi_consistent_read(bool yes) +/*===========================================*/ +{ + row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; + + if (yes && srv_locks_unsafe_for_binlog) { + prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT; + } else { + prebuilt->row_read_type = ROW_READ_WITH_LOCKS; + } } /********************************************************************** @@ -3841,7 +3878,8 @@ int ha_innobase::index_init( /*====================*/ /* out: 0 or error number */ - uint keynr) /* in: key (index) number */ + uint keynr, /* in: key (index) number */ + bool sorted) /* in: 1 if result MUST be sorted according to index */ { int error = 0; DBUG_ENTER("index_init"); @@ -4344,6 +4382,13 @@ ha_innobase::rnd_init( err = change_active_index(primary_key); } + /* Don't use semi-consistent read in random row reads (by position). + This means we must disable semi_consistent_read if scan is false */ + + if (!scan) { + try_semi_consistent_read(0); + } + start_of_scan = 1; return(err); @@ -4822,7 +4867,7 @@ ha_innobase::create( srv_lower_case_table_names = FALSE; } - fn_format(name2, name, "", "", 2); // Remove the .frm extension + strcpy(name2, name); normalize_table_name(norm_name, name2); @@ -4844,8 +4889,8 @@ ha_innobase::create( /* Look for a primary key */ - primary_key_no= (table->s->primary_key != MAX_KEY ? - (int) table->s->primary_key : + primary_key_no= (form->s->primary_key != MAX_KEY ? + (int) form->s->primary_key : -1); /* Our function row_get_mysql_key_number_for_index assumes @@ -5118,7 +5163,7 @@ ha_innobase::delete_table( /********************************************************************* Removes all tables in the named database inside InnoDB. */ -int +void innobase_drop_database( /*===================*/ /* out: error number */ @@ -5184,10 +5229,13 @@ innobase_drop_database( innobase_commit_low(trx); trx_free_for_mysql(trx); - +#ifdef NO_LONGER_INTERESTED_IN_DROP_DB_ERROR error = convert_error_code_to_mysql(error, NULL); return(error); +#else + return; +#endif } /************************************************************************* @@ -5716,7 +5764,7 @@ ha_innobase::analyze( } /************************************************************************** -This is mapped to "ALTER TABLE tablename TYPE=InnoDB", which rebuilds +This is mapped to "ALTER TABLE tablename ENGINE=InnoDB", which rebuilds the table in MySQL. */ int @@ -5780,7 +5828,6 @@ ha_innobase::update_table_comment( uint length = (uint) strlen(comment); char* str; row_prebuilt_t* prebuilt = (row_prebuilt_t*)innobase_prebuilt; - long flen; /* We do not know if MySQL can call this function before calling external_lock(). To be safe, update the thd of the current table @@ -5800,42 +5847,42 @@ ha_innobase::update_table_comment( trx_search_latch_release_if_reserved(prebuilt->trx); str = NULL; - /* output the data to a temporary file */ + if (FILE* file = os_file_create_tmpfile()) { + long flen; - mutex_enter_noninline(&srv_dict_tmpfile_mutex); - rewind(srv_dict_tmpfile); - - fprintf(srv_dict_tmpfile, "InnoDB free: %lu kB", + /* output the data to a temporary file */ + fprintf(file, "InnoDB free: %lu kB", (ulong) fsp_get_available_space_in_free_extents( prebuilt->table->space)); - dict_print_info_on_foreign_keys(FALSE, srv_dict_tmpfile, + dict_print_info_on_foreign_keys(FALSE, file, prebuilt->trx, prebuilt->table); - flen = ftell(srv_dict_tmpfile); - if (flen < 0) { - flen = 0; - } else if (length + flen + 3 > 64000) { - flen = 64000 - 3 - length; - } + flen = ftell(file); + if (flen < 0) { + flen = 0; + } else if (length + flen + 3 > 64000) { + flen = 64000 - 3 - length; + } - /* allocate buffer for the full string, and - read the contents of the temporary file */ + /* allocate buffer for the full string, and + read the contents of the temporary file */ - str = my_malloc(length + flen + 3, MYF(0)); + str = my_malloc(length + flen + 3, MYF(0)); - if (str) { - char* pos = str + length; - if (length) { - memcpy(str, comment, length); - *pos++ = ';'; - *pos++ = ' '; + if (str) { + char* pos = str + length; + if (length) { + memcpy(str, comment, length); + *pos++ = ';'; + *pos++ = ' '; + } + rewind(file); + flen = (uint) fread(pos, 1, flen, file); + pos[flen] = 0; } - rewind(srv_dict_tmpfile); - flen = (uint) fread(pos, 1, flen, srv_dict_tmpfile); - pos[flen] = 0; - } - mutex_exit_noninline(&srv_dict_tmpfile_mutex); + fclose(file); + } prebuilt->trx->op_info = (char*)""; @@ -5854,7 +5901,6 @@ ha_innobase::get_foreign_key_create_info(void) { row_prebuilt_t* prebuilt = (row_prebuilt_t*)innobase_prebuilt; char* str = 0; - long flen; ut_a(prebuilt != NULL); @@ -5864,41 +5910,46 @@ ha_innobase::get_foreign_key_create_info(void) update_thd(current_thd); - prebuilt->trx->op_info = (char*)"getting info on foreign keys"; + if (FILE* file = os_file_create_tmpfile()) { + long flen; - /* In case MySQL calls this in the middle of a SELECT query, - release possible adaptive hash latch to avoid - deadlocks of threads */ + prebuilt->trx->op_info = (char*)"getting info on foreign keys"; - trx_search_latch_release_if_reserved(prebuilt->trx); + /* In case MySQL calls this in the middle of a SELECT query, + release possible adaptive hash latch to avoid + deadlocks of threads */ - mutex_enter_noninline(&srv_dict_tmpfile_mutex); - rewind(srv_dict_tmpfile); + trx_search_latch_release_if_reserved(prebuilt->trx); - /* output the data to a temporary file */ - dict_print_info_on_foreign_keys(TRUE, srv_dict_tmpfile, + /* output the data to a temporary file */ + dict_print_info_on_foreign_keys(TRUE, file, prebuilt->trx, prebuilt->table); - prebuilt->trx->op_info = (char*)""; + prebuilt->trx->op_info = (char*)""; - flen = ftell(srv_dict_tmpfile); - if (flen < 0) { - flen = 0; - } else if (flen > 64000 - 1) { - flen = 64000 - 1; - } + flen = ftell(file); + if (flen < 0) { + flen = 0; + } else if (flen > 64000 - 1) { + flen = 64000 - 1; + } - /* allocate buffer for the string, and - read the contents of the temporary file */ + /* allocate buffer for the string, and + read the contents of the temporary file */ - str = my_malloc(flen + 1, MYF(0)); + str = my_malloc(flen + 1, MYF(0)); - if (str) { - rewind(srv_dict_tmpfile); - flen = (uint) fread(str, 1, flen, srv_dict_tmpfile); - str[flen] = 0; - } + if (str) { + rewind(file); + flen = (uint) fread(str, 1, flen, file); + str[flen] = 0; + } - mutex_exit_noninline(&srv_dict_tmpfile_mutex); + fclose(file); + } else { + /* unable to create temporary file */ + str = my_strdup( +"/* Error: cannot display foreign key constraints */", MYF(0)); + } return(str); } @@ -6329,17 +6380,14 @@ ha_innobase::external_lock( TABLES if AUTOCOMMIT=1. It does not make much sense to acquire an InnoDB table lock if it is released immediately at the end of LOCK TABLES, and InnoDB's table locks in that case cause - VERY easily deadlocks. - - We do not set InnoDB table locks if user has not explicitly - requested a table lock. Note that thd->in_lock_tables - can be TRUE on some cases e.g. at the start of a stored - procedure call (SQLCOM_CALL). */ + VERY easily deadlocks. We do not set InnoDB table locks when + MySQL sets them at the start of a stored procedure call + (MySQL does have thd->in_lock_tables TRUE there). */ if (prebuilt->select_lock_type != LOCK_NONE) { if (thd->in_lock_tables && - thd->lex->sql_command == SQLCOM_LOCK_TABLES && + thd->lex->sql_command != SQLCOM_CALL && thd->variables.innodb_table_locks && (thd->options & OPTION_NOT_AUTOCOMMIT)) { @@ -6492,11 +6540,13 @@ ha_innobase::transactional_table_lock( /**************************************************************************** Here we export InnoDB status variables to MySQL. */ -void -innodb_export_status(void) +int +innodb_export_status() /*======================*/ { - srv_export_innodb_status(); + if (innodb_inited) + srv_export_innodb_status(); + return 0; } /**************************************************************************** @@ -6506,9 +6556,9 @@ Monitor to the client. */ bool innodb_show_status( /*===============*/ - THD* thd) /* in: the MySQL query thread of the caller */ + THD* thd, /* in: the MySQL query thread of the caller */ + stat_print_fn *stat_print) { - Protocol* protocol = thd->protocol; trx_t* trx; static const char truncated_msg[] = "... truncated...\n"; const long MAX_STATUS_SIZE = 64000; @@ -6518,10 +6568,7 @@ innodb_show_status( DBUG_ENTER("innodb_show_status"); if (have_innodb != SHOW_OPTION_YES) { - my_message(ER_NOT_SUPPORTED_YET, - "Cannot call SHOW INNODB STATUS because skip-innodb is defined", - MYF(0)); - DBUG_RETURN(TRUE); + DBUG_RETURN(FALSE); } trx = check_trx_exists(thd); @@ -6583,28 +6630,15 @@ innodb_show_status( mutex_exit_noninline(&srv_monitor_file_mutex); - List<Item> field_list; - - field_list.push_back(new Item_empty_string("Status", flen)); - - if (protocol->send_fields(&field_list, Protocol::SEND_NUM_ROWS | - Protocol::SEND_EOF)) { - my_free(str, MYF(0)); - - DBUG_RETURN(TRUE); - } - - protocol->prepare_for_resend(); - protocol->store(str, flen, system_charset_info); - my_free(str, MYF(0)); - - if (protocol->write()) { + bool result = FALSE; - DBUG_RETURN(TRUE); + if (stat_print(thd, innobase_hton.name, strlen(innobase_hton.name), + STRING_WITH_LEN(""), str, flen)) { + result= TRUE; } - send_eof(thd); + my_free(str, MYF(0)); - DBUG_RETURN(FALSE); + DBUG_RETURN(FALSE); } /**************************************************************************** @@ -6613,10 +6647,10 @@ Implements the SHOW MUTEX STATUS command. . */ bool innodb_mutex_show_status( /*===============*/ - THD* thd) /* in: the MySQL query thread of the caller */ + THD* thd, /* in: the MySQL query thread of the caller */ + stat_print_fn *stat_print) { - Protocol *protocol= thd->protocol; - List<Item> field_list; + char buf1[IO_SIZE], buf2[IO_SIZE]; mutex_t* mutex; ulint rw_lock_count= 0; ulint rw_lock_count_spin_loop= 0; @@ -6624,21 +6658,9 @@ innodb_mutex_show_status( ulint rw_lock_count_os_wait= 0; ulint rw_lock_count_os_yield= 0; ulonglong rw_lock_wait_time= 0; + uint hton_name_len= strlen(innobase_hton.name), buf1len, buf2len; DBUG_ENTER("innodb_mutex_show_status"); - field_list.push_back(new Item_empty_string("Mutex", FN_REFLEN)); - field_list.push_back(new Item_empty_string("Module", FN_REFLEN)); - field_list.push_back(new Item_uint("Count", 21)); - field_list.push_back(new Item_uint("Spin_waits", 21)); - field_list.push_back(new Item_uint("Spin_rounds", 21)); - field_list.push_back(new Item_uint("OS_waits", 21)); - field_list.push_back(new Item_uint("OS_yields", 21)); - field_list.push_back(new Item_uint("OS_waits_time", 21)); - - if (protocol->send_fields(&field_list, - Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)) - DBUG_RETURN(TRUE); - #ifdef MUTEX_PROTECT_TO_BE_ADDED_LATER mutex_enter(&mutex_list_mutex); #endif @@ -6651,17 +6673,17 @@ innodb_mutex_show_status( { if (mutex->count_using > 0) { - protocol->prepare_for_resend(); - protocol->store(mutex->cmutex_name, system_charset_info); - protocol->store(mutex->cfile_name, system_charset_info); - protocol->store((ulonglong)mutex->count_using); - protocol->store((ulonglong)mutex->count_spin_loop); - protocol->store((ulonglong)mutex->count_spin_rounds); - protocol->store((ulonglong)mutex->count_os_wait); - protocol->store((ulonglong)mutex->count_os_yield); - protocol->store((ulonglong)mutex->lspent_time/1000); - - if (protocol->write()) + buf1len= my_snprintf(buf1, sizeof(buf1), "%s:%s", + mutex->cmutex_name, mutex->cfile_name); + buf2len= my_snprintf(buf2, sizeof(buf2), + "count=%lu, spin_waits=%lu, spin_rounds=%lu, " + "os_waits=%lu, os_yields=%lu, os_wait_times=%lu", + mutex->count_using, mutex->count_spin_loop, + mutex->count_spin_rounds, + mutex->count_os_wait, mutex->count_os_yield, + mutex->lspent_time/1000); + if (stat_print(thd, innobase_hton.name, hton_name_len, + buf1, buf1len, buf2, buf2len)) { #ifdef MUTEX_PROTECT_TO_BE_ADDED_LATER mutex_exit(&mutex_list_mutex); @@ -6683,17 +6705,16 @@ innodb_mutex_show_status( mutex = UT_LIST_GET_NEXT(list, mutex); } - protocol->prepare_for_resend(); - protocol->store("rw_lock_mutexes", system_charset_info); - protocol->store("", system_charset_info); - protocol->store((ulonglong)rw_lock_count); - protocol->store((ulonglong)rw_lock_count_spin_loop); - protocol->store((ulonglong)rw_lock_count_spin_rounds); - protocol->store((ulonglong)rw_lock_count_os_wait); - protocol->store((ulonglong)rw_lock_count_os_yield); - protocol->store((ulonglong)rw_lock_wait_time/1000); - - if (protocol->write()) + buf2len= my_snprintf(buf2, sizeof(buf2), + "count=%lu, spin_waits=%lu, spin_rounds=%lu, " + "os_waits=%lu, os_yields=%lu, os_wait_times=%lu", + rw_lock_count, rw_lock_count_spin_loop, + rw_lock_count_spin_rounds, + rw_lock_count_os_wait, rw_lock_count_os_yield, + rw_lock_wait_time/1000); + + if (stat_print(thd, innobase_hton.name, hton_name_len, + STRING_WITH_LEN("rw_lock_mutexes"), buf2, buf2len)) { DBUG_RETURN(1); } @@ -6701,10 +6722,23 @@ innodb_mutex_show_status( #ifdef MUTEX_PROTECT_TO_BE_ADDED_LATER mutex_exit(&mutex_list_mutex); #endif - send_eof(thd); DBUG_RETURN(FALSE); } +bool innobase_show_status(THD* thd, stat_print_fn* stat_print, + enum ha_stat_type stat_type) +{ + switch (stat_type) { + case HA_ENGINE_STATUS: + return innodb_show_status(thd, stat_print); + case HA_ENGINE_MUTEX: + return innodb_mutex_show_status(thd, stat_print); + default: + return FALSE; + } +} + + /**************************************************************************** Handling the shared INNOBASE_SHARE structure that is needed to provide table locking. @@ -6841,7 +6875,7 @@ ha_innobase::store_lock( } else if (lock_type != TL_IGNORE) { - /* We set possible LOCK_X value in external_lock, not yet + /* We set possible LOCK_X value in external_lock, not yet here even if this would be SELECT ... FOR UPDATE */ prebuilt->select_lock_type = LOCK_NONE; @@ -6850,7 +6884,7 @@ ha_innobase::store_lock( if (lock_type != TL_IGNORE && lock.type == TL_UNLOCK) { - /* Starting from 5.0.7, we weaken also the table locks + /* Starting from 5.0.7, we weaken also the table locks set at the start of a MySQL stored procedure call, just like we weaken the locks set at the start of an SQL statement. MySQL does set thd->in_lock_tables TRUE there, but in reality @@ -6873,36 +6907,26 @@ ha_innobase::store_lock( lock_type = TL_READ_NO_INSERT; } - /* If we are not doing a LOCK TABLE, DISCARD/IMPORT - TABLESPACE or TRUNCATE TABLE then allow multiple - writers. Note that ALTER TABLE uses a TL_WRITE_ALLOW_READ - < TL_WRITE_CONCURRENT_INSERT. - - We especially allow multiple writers if MySQL is at the - start of a stored procedure call (SQLCOM_CALL) - (MySQL does have thd->in_lock_tables TRUE there). */ + /* If we are not doing a LOCK TABLE or DISCARD/IMPORT + TABLESPACE or TRUNCATE TABLE, then allow multiple writers */ - if ((lock_type >= TL_WRITE_CONCURRENT_INSERT - && lock_type <= TL_WRITE) + if ((lock_type >= TL_WRITE_CONCURRENT_INSERT && + lock_type <= TL_WRITE) && (!thd->in_lock_tables - || thd->lex->sql_command == SQLCOM_CALL) + || thd->lex->sql_command == SQLCOM_CALL) && !thd->tablespace_op && thd->lex->sql_command != SQLCOM_TRUNCATE - && thd->lex->sql_command != SQLCOM_OPTIMIZE - && thd->lex->sql_command != SQLCOM_CREATE_TABLE) { + && thd->lex->sql_command != SQLCOM_OPTIMIZE + && thd->lex->sql_command != SQLCOM_CREATE_TABLE) { - lock_type = TL_WRITE_ALLOW_WRITE; + lock_type = TL_WRITE_ALLOW_WRITE; } /* In queries of type INSERT INTO t1 SELECT ... FROM t2 ... MySQL would use the lock TL_READ_NO_INSERT on t2, and that would conflict with TL_WRITE_ALLOW_WRITE, blocking all inserts to t2. Convert the lock to a normal read lock to allow - concurrent inserts to t2. - - We especially allow concurrent inserts if MySQL is at the - start of a stored procedure call (SQLCOM_CALL) - (MySQL does have thd->in_lock_tables TRUE there). */ + concurrent inserts to t2. */ if (lock_type == TL_READ_NO_INSERT && (!thd->in_lock_tables @@ -6911,10 +6935,10 @@ ha_innobase::store_lock( lock_type = TL_READ; } - lock.type = lock_type; - } + lock.type = lock_type; + } - *to++= &lock; + *to++= &lock; return(to); } @@ -6983,7 +7007,7 @@ ha_innobase::innobase_read_and_init_auto_inc( } (void) extra(HA_EXTRA_KEYREAD); - index_init(table->s->next_number_index); + index_init(table->s->next_number_index, 1); /* Starting from 5.0.9, we use a consistent read to read the auto-inc column maximum value. This eliminates the spurious deadlocks caused @@ -7527,4 +7551,23 @@ innobase_set_cursor_view( (cursor_view_t*) curview); } -#endif /* HAVE_INNOBASE_DB */ + +bool ha_innobase::check_if_incompatible_data(HA_CREATE_INFO *info, + uint table_changes) +{ + if (table_changes != IS_EQUAL_YES) + return COMPATIBLE_DATA_NO; + + /* Check that auto_increment value was not changed */ + if ((info->used_fields & HA_CREATE_USED_AUTO) && + info->auto_increment_value != 0) + return COMPATIBLE_DATA_NO; + + /* Check that row format didn't change */ + if ((info->used_fields & HA_CREATE_USED_AUTO) && + get_row_type() != info->row_type) + return COMPATIBLE_DATA_NO; + + return COMPATIBLE_DATA_YES; +} + diff --git a/sql/ha_innodb.h b/sql/ha_innodb.h index fb93b2abb0e..b9e399509de 100644 --- a/sql/ha_innodb.h +++ b/sql/ha_innodb.h @@ -52,6 +52,7 @@ class ha_innobase: public handler THR_LOCK_DATA lock; INNOBASE_SHARE *share; + gptr alloc_ptr; byte* upd_buff; /* buffer used in updates */ byte* key_val_buff; /* buffer used in converting search key values from MySQL format @@ -61,6 +62,7 @@ class ha_innobase: public handler two buffers */ ulong int_table_flags; uint primary_key; + uint last_dup_key; ulong start_of_scan; /* this is set to 1 when we are starting a table scan but have not yet fetched any row, else 0 */ @@ -68,6 +70,7 @@ class ha_innobase: public handler ROW_SEL_EXACT, ROW_SEL_EXACT_PREFIX, or undefined */ uint num_write_row; /* number of write_row() calls */ + ulong max_supported_row_length(const byte *buf); uint store_key_val_for_row(uint keynr, char* buff, uint buff_len, const byte* record); @@ -78,7 +81,7 @@ class ha_innobase: public handler /* Init values for the class: */ public: - ha_innobase(TABLE *table_arg); + ha_innobase(TABLE_SHARE *table_arg); ~ha_innobase() {} /* Get the row type from the storage engine. If this method returns @@ -119,9 +122,11 @@ class ha_innobase: public handler int write_row(byte * buf); int update_row(const byte * old_data, byte * new_data); int delete_row(const byte * buf); + bool was_semi_consistent_read(); + void try_semi_consistent_read(bool yes); void unlock_row(); - int index_init(uint index); + int index_init(uint index, bool sorted); int index_end(); int index_read(byte * buf, const byte * key, uint key_len, enum ha_rkey_function find_flag); @@ -149,6 +154,16 @@ class ha_innobase: public handler int transactional_table_lock(THD *thd, int lock_type); int start_stmt(THD *thd, thr_lock_type lock_type); + int ha_retrieve_all_cols() + { + ha_set_all_bits_in_read_set(); + return extra(HA_EXTRA_RETRIEVE_ALL_COLS); + } + int ha_retrieve_all_pk() + { + ha_set_primary_key_in_read_set(); + return extra(HA_EXTRA_RETRIEVE_PRIMARY_KEY); + } void position(byte *record); ha_rows records_in_range(uint inx, key_range *min_key, key_range *max_key); @@ -193,11 +208,17 @@ class ha_innobase: public handler static ulonglong get_mysql_bin_log_pos(); bool primary_key_is_clustered() { return true; } int cmp_ref(const byte *ref1, const byte *ref2); + bool check_if_incompatible_data(HA_CREATE_INFO *info, + uint table_changes); }; -extern struct show_var_st innodb_status_variables[]; -extern ulong innobase_fast_shutdown; +extern SHOW_VAR innodb_status_variables[]; +extern uint innobase_init_flags, innobase_lock_type; +extern uint innobase_flush_log_at_trx_commit; +extern ulong innobase_cache_size, innobase_fast_shutdown; extern ulong innobase_large_page_size; +extern char *innobase_home, *innobase_tmpdir, *innobase_logdir; +extern long innobase_lock_scan_time; extern long innobase_mirrored_log_groups, innobase_log_files_in_group; extern longlong innobase_buffer_pool_size, innobase_log_file_size; extern long innobase_log_buffer_size; @@ -231,11 +252,12 @@ extern ulong srv_n_free_tickets_to_enter; extern ulong srv_thread_sleep_delay; extern ulong srv_thread_concurrency; extern ulong srv_commit_concurrency; -extern ulong srv_flush_log_at_trx_commit; } +extern TYPELIB innobase_lock_typelib; + bool innobase_init(void); -bool innobase_end(void); +int innobase_end(ha_panic_function type); bool innobase_flush_logs(void); uint innobase_get_free_space(void); @@ -253,12 +275,10 @@ int innobase_commit_complete(void* trx_handle); void innobase_store_binlog_offset_and_flush_log(char *binlog_name,longlong offset); #endif -int innobase_drop_database(char *path); -bool innodb_show_status(THD* thd); -bool innodb_mutex_show_status(THD* thd); -void innodb_export_status(void); +void innobase_drop_database(char *path); +bool innobase_show_status(THD* thd, stat_print_fn*, enum ha_stat_type); -void innobase_release_temporary_latches(THD *thd); +int innobase_release_temporary_latches(THD *thd); void innobase_store_binlog_offset_and_flush_log(char *binlog_name,longlong offset); @@ -303,6 +323,9 @@ int innobase_rollback_by_xid( XID *xid); /* in : X/Open XA Transaction Identification */ +int innobase_xa_end(THD *thd); + + int innobase_repl_report_sent_binlog(THD *thd, char *log_file_name, my_off_t end_offset); diff --git a/sql/ha_myisam.cc b/sql/ha_myisam.cc index ff6431fa0f3..fba36450d81 100644 --- a/sql/ha_myisam.cc +++ b/sql/ha_myisam.cc @@ -27,8 +27,8 @@ #ifndef MASTER #include "../srclib/myisam/myisamdef.h" #else -#include "../myisam/myisamdef.h" -#include "../myisam/rt_index.h" +#include "../storage/myisam/myisamdef.h" +#include "../storage/myisam/rt_index.h" #endif ulong myisam_recover_options= HA_RECOVER_NONE; @@ -50,9 +50,12 @@ TYPELIB myisam_stats_method_typelib= { ** MyISAM tables *****************************************************************************/ +static handler *myisam_create_handler(TABLE_SHARE *table); + /* MyISAM handlerton */ handlerton myisam_hton= { + MYSQL_HANDLERTON_INTERFACE_VERSION, "MyISAM", SHOW_OPTION_YES, "Default engine as of MySQL 3.23 with great performance", @@ -77,9 +80,28 @@ handlerton myisam_hton= { MyISAM doesn't support transactions and doesn't have transaction-dependent context: cursors can survive a commit. */ - HTON_CAN_RECREATE + myisam_create_handler, /* Create a new handler */ + NULL, /* Drop a database */ + mi_panic,/* Panic call */ + NULL, /* Start Consistent Snapshot */ + NULL, /* Flush logs */ + NULL, /* Show status */ + NULL, /* Partition flags */ + NULL, /* Alter table flags */ + NULL, /* Alter Tablespace */ + NULL, /* Fill Files Table */ + HTON_CAN_RECREATE, + NULL, /* binlog_func */ + NULL /* binlog_log_query */ }; + +static handler *myisam_create_handler(TABLE_SHARE *table) +{ + return new ha_myisam(table); +} + + // collect errors printed by mi_check routines static void mi_check_print_msg(MI_CHECK *param, const char* msg_type, @@ -160,7 +182,7 @@ void mi_check_print_warning(MI_CHECK *param, const char *fmt,...) } -ha_myisam::ha_myisam(TABLE *table_arg) +ha_myisam::ha_myisam(TABLE_SHARE *table_arg) :handler(&myisam_hton, table_arg), file(0), int_table_flags(HA_NULL_IN_KEY | HA_CAN_FULLTEXT | HA_CAN_SQL_HANDLER | HA_DUPP_POS | HA_CAN_INDEX_BLOBS | HA_AUTO_PART_KEY | @@ -277,15 +299,42 @@ err: } #endif /* HAVE_REPLICATION */ + +bool ha_myisam::check_if_locking_is_allowed(uint sql_command, + ulong type, TABLE *table, + uint count, + bool called_by_logger_thread) +{ + /* + To be able to open and lock for reading system tables like 'mysql.proc', + when we already have some tables opened and locked, and avoid deadlocks + we have to disallow write-locking of these tables with any other tables. + */ + if (table->s->system_table && + table->reginfo.lock_type >= TL_WRITE_ALLOW_WRITE && + count != 1) + { + my_error(ER_WRONG_LOCK_OF_SYSTEM_TABLE, MYF(0), table->s->db.str, + table->s->table_name.str); + return FALSE; + } + return TRUE; +} + /* Name is here without an extension */ int ha_myisam::open(const char *name, int mode, uint test_if_locked) { - if (!(file=mi_open(name, mode, test_if_locked))) + uint i; + if (!(file=mi_open(name, mode, test_if_locked | HA_OPEN_FROM_SQL_LAYER))) return (my_errno ? my_errno : -1); if (test_if_locked & (HA_OPEN_IGNORE_IF_LOCKED | HA_OPEN_TMP_TABLE)) VOID(mi_extra(file, HA_EXTRA_NO_WAIT_LOCK, 0)); + + if (!(test_if_locked & HA_OPEN_TMP_TABLE) && opt_myisam_use_mmap) + VOID(mi_extra(file, HA_EXTRA_MMAP, 0)); + info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST); if (!(test_if_locked & HA_OPEN_WAIT_IF_LOCKED)) VOID(mi_extra(file, HA_EXTRA_WAIT_LOCK, 0)); @@ -293,6 +342,14 @@ int ha_myisam::open(const char *name, int mode, uint test_if_locked) int_table_flags|=HA_REC_NOT_IN_SEQ; if (file->s->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD)) int_table_flags|=HA_HAS_CHECKSUM; + + for (i= 0; i < table->s->keys; i++) + { + struct st_plugin_int *parser= table->key_info[i].parser; + if (table->key_info[i].flags & HA_USES_PARSER) + file->s->keyinfo[i].parser= + (struct st_mysql_ftparser *)parser->plugin->info; + } return (0); } @@ -332,7 +389,7 @@ int ha_myisam::check(THD* thd, HA_CHECK_OPT* check_opt) myisamchk_init(¶m); param.thd = thd; param.op_name = "check"; - param.db_name= table->s->db; + param.db_name= table->s->db.str; param.table_name= table->alias; param.testflag = check_opt->flags | T_CHECK | T_SILENT; param.stats_method= (enum_mi_stats_method)thd->variables.myisam_stats_method; @@ -422,7 +479,7 @@ int ha_myisam::analyze(THD *thd, HA_CHECK_OPT* check_opt) myisamchk_init(¶m); param.thd = thd; param.op_name= "analyze"; - param.db_name= table->s->db; + param.db_name= table->s->db.str; param.table_name= table->alias; param.testflag= (T_FAST | T_CHECK | T_SILENT | T_STATISTICS | T_DONT_CHECK_CHECKSUM); @@ -450,7 +507,7 @@ int ha_myisam::restore(THD* thd, HA_CHECK_OPT *check_opt) HA_CHECK_OPT tmp_check_opt; char *backup_dir= thd->lex->backup_dir; char src_path[FN_REFLEN], dst_path[FN_REFLEN]; - const char *table_name= table->s->table_name; + const char *table_name= table->s->table_name.str; int error; const char* errmsg; DBUG_ENTER("restore"); @@ -459,8 +516,8 @@ int ha_myisam::restore(THD* thd, HA_CHECK_OPT *check_opt) MI_NAME_DEXT)) DBUG_RETURN(HA_ADMIN_INVALID); - if (my_copy(src_path, fn_format(dst_path, table->s->path, "", - MI_NAME_DEXT, 4), MYF(MY_WME))) + strxmov(dst_path, table->s->normalized_path.str, MI_NAME_DEXT, NullS); + if (my_copy(src_path, dst_path, MYF(MY_WME))) { error= HA_ADMIN_FAILED; errmsg= "Failed in my_copy (Error %d)"; @@ -477,8 +534,8 @@ int ha_myisam::restore(THD* thd, HA_CHECK_OPT *check_opt) myisamchk_init(¶m); param.thd= thd; param.op_name= "restore"; - param.db_name= table->s->db; - param.table_name= table->s->table_name; + param.db_name= table->s->db.str; + param.table_name= table->s->table_name.str; param.testflag= 0; mi_check_print_error(¶m, errmsg, my_errno); DBUG_RETURN(error); @@ -490,7 +547,7 @@ int ha_myisam::backup(THD* thd, HA_CHECK_OPT *check_opt) { char *backup_dir= thd->lex->backup_dir; char src_path[FN_REFLEN], dst_path[FN_REFLEN]; - const char *table_name= table->s->table_name; + const char *table_name= table->s->table_name.str; int error; const char *errmsg; DBUG_ENTER("ha_myisam::backup"); @@ -503,9 +560,8 @@ int ha_myisam::backup(THD* thd, HA_CHECK_OPT *check_opt) goto err; } - if (my_copy(fn_format(src_path, table->s->path, "", reg_ext, - MY_UNPACK_FILENAME), - dst_path, + strxmov(src_path, table->s->normalized_path.str, reg_ext, NullS); + if (my_copy(src_path, dst_path, MYF(MY_WME | MY_HOLD_ORIGINAL_MODES | MY_DONT_OVERWRITE_FILE))) { error = HA_ADMIN_FAILED; @@ -514,17 +570,16 @@ int ha_myisam::backup(THD* thd, HA_CHECK_OPT *check_opt) } /* Change extension */ - if (!fn_format(dst_path, dst_path, "", MI_NAME_DEXT, - MY_REPLACE_EXT | MY_UNPACK_FILENAME | MY_SAFE_PATH)) + if (fn_format_relative_to_data_home(dst_path, table_name, backup_dir, + MI_NAME_DEXT)) { errmsg = "Failed in fn_format() for .MYD file (errno: %d)"; error = HA_ADMIN_INVALID; goto err; } - if (my_copy(fn_format(src_path, table->s->path, "", MI_NAME_DEXT, - MY_UNPACK_FILENAME), - dst_path, + strxmov(src_path, table->s->normalized_path.str, MI_NAME_DEXT, NullS); + if (my_copy(src_path, dst_path, MYF(MY_WME | MY_HOLD_ORIGINAL_MODES | MY_DONT_OVERWRITE_FILE))) { errmsg = "Failed copying .MYD file (errno: %d)"; @@ -539,8 +594,8 @@ int ha_myisam::backup(THD* thd, HA_CHECK_OPT *check_opt) myisamchk_init(¶m); param.thd= thd; param.op_name= "backup"; - param.db_name= table->s->db; - param.table_name= table->s->table_name; + param.db_name= table->s->db.str; + param.table_name= table->s->table_name.str; param.testflag = 0; mi_check_print_error(¶m,errmsg, my_errno); DBUG_RETURN(error); @@ -631,7 +686,7 @@ int ha_myisam::repair(THD *thd, MI_CHECK ¶m, bool optimize) ha_rows rows= file->state->records; DBUG_ENTER("ha_myisam::repair"); - param.db_name= table->s->db; + param.db_name= table->s->db.str; param.table_name= table->alias; param.tmpfile_createflag = O_RDWR | O_TRUNC; param.using_global_keycache = 1; @@ -802,8 +857,8 @@ int ha_myisam::assign_to_keycache(THD* thd, HA_CHECK_OPT *check_opt) myisamchk_init(¶m); param.thd= thd; param.op_name= "assign_to_keycache"; - param.db_name= table->s->db; - param.table_name= table->s->table_name; + param.db_name= table->s->db.str; + param.table_name= table->s->table_name.str; param.testflag= 0; mi_check_print_error(¶m, errmsg); } @@ -870,8 +925,8 @@ int ha_myisam::preload_keys(THD* thd, HA_CHECK_OPT *check_opt) myisamchk_init(¶m); param.thd= thd; param.op_name= "preload_keys"; - param.db_name= table->s->db; - param.table_name= table->s->table_name; + param.db_name= table->s->db.str; + param.table_name= table->s->table_name.str; param.testflag= 0; mi_check_print_error(¶m, errmsg); DBUG_RETURN(error); @@ -1125,8 +1180,8 @@ bool ha_myisam::check_and_repair(THD *thd) old_query= thd->query; old_query_length= thd->query_length; pthread_mutex_lock(&LOCK_thread_count); - thd->query= (char*) table->s->table_name; - thd->query_length= (uint32) strlen(table->s->table_name); + thd->query= table->s->table_name.str; + thd->query_length= table->s->table_name.length; pthread_mutex_unlock(&LOCK_thread_count); if ((marked_crashed= mi_is_crashed(file)) || check(thd, &check_opt)) @@ -1313,6 +1368,10 @@ void ha_myisam::info(uint flag) ref_length= info.reflength; share->db_options_in_use= info.options; block_size= myisam_block_size; + + /* Update share */ + if (share->tmp_table == NO_TMP_TABLE) + pthread_mutex_lock(&share->mutex); share->keys_in_use.set_prefix(share->keys); share->keys_in_use.intersect_extended(info.key_map); share->keys_for_keyread.intersect(share->keys_in_use); @@ -1321,19 +1380,18 @@ void ha_myisam::info(uint flag) memcpy((char*) table->key_info[0].rec_per_key, (char*) info.rec_per_key, sizeof(table->key_info[0].rec_per_key)*share->key_parts); - raid_type= info.raid_type; - raid_chunks= info.raid_chunks; - raid_chunksize= info.raid_chunksize; + if (share->tmp_table == NO_TMP_TABLE) + pthread_mutex_unlock(&share->mutex); /* Set data_file_name and index_file_name to point at the symlink value if table is symlinked (Ie; Real name is not same as generated name) */ - data_file_name=index_file_name=0; - fn_format(name_buff, file->filename, "", MI_NAME_DEXT, 2); + data_file_name= index_file_name= 0; + fn_format(name_buff, file->filename, "", MI_NAME_DEXT, MY_APPEND_EXT); if (strcmp(name_buff, info.data_file_name)) data_file_name=info.data_file_name; - strmov(fn_ext(name_buff),MI_NAME_IEXT); + fn_format(name_buff, file->filename, "", MI_NAME_IEXT, MY_APPEND_EXT); if (strcmp(name_buff, info.index_file_name)) index_file_name=info.index_file_name; } @@ -1401,12 +1459,6 @@ void ha_myisam::update_create_info(HA_CREATE_INFO *create_info) { create_info->auto_increment_value=auto_increment_value; } - if (!(create_info->used_fields & HA_CREATE_USED_RAID)) - { - create_info->raid_type= raid_type; - create_info->raid_chunks= raid_chunks; - create_info->raid_chunksize= raid_chunksize; - } create_info->data_file_name=data_file_name; create_info->index_file_name=index_file_name; } @@ -1424,7 +1476,7 @@ int ha_myisam::create(const char *name, register TABLE *table_arg, MI_KEYDEF *keydef; MI_COLUMNDEF *recinfo,*recinfo_pos; HA_KEYSEG *keyseg; - TABLE_SHARE *share= table->s; + TABLE_SHARE *share= table_arg->s; uint options= share->db_options_in_use; DBUG_ENTER("ha_myisam::create"); @@ -1442,6 +1494,8 @@ int ha_myisam::create(const char *name, register TABLE *table_arg, pos=table_arg->key_info; for (i=0; i < share->keys ; i++, pos++) { + if (pos->flags & HA_USES_PARSER) + create_flags|= HA_CREATE_RELIES_ON_SQL_LAYER; keydef[i].flag= (pos->flags & (HA_NOSAME | HA_FULLTEXT | HA_SPATIAL)); keydef[i].key_alg= pos->algorithm == HA_KEY_ALG_UNDEF ? (pos->flags & HA_SPATIAL ? HA_KEY_ALG_RTREE : HA_KEY_ALG_BTREE) : @@ -1596,11 +1650,6 @@ int ha_myisam::create(const char *name, register TABLE *table_arg, (ulonglong) 0); create_info.data_file_length= ((ulonglong) share->max_rows * share->avg_row_length); - create_info.raid_type=info->raid_type; - create_info.raid_chunks= (info->raid_chunks ? info->raid_chunks : - RAID_DEFAULT_CHUNKS); - create_info.raid_chunksize= (info->raid_chunksize ? info->raid_chunksize : - RAID_DEFAULT_CHUNKSIZE); create_info.data_file_name= info->data_file_name; create_info.index_file_name= info->index_file_name; @@ -1614,7 +1663,7 @@ int ha_myisam::create(const char *name, register TABLE *table_arg, create_flags|= HA_CREATE_DELAY_KEY_WRITE; /* TODO: Check that the following fn_format is really needed */ - error=mi_create(fn_format(buff,name,"","",2+4), + error=mi_create(fn_format(buff,name,"","",MY_UNPACK_FILENAME|MY_APPEND_EXT), share->keys,keydef, (uint) (recinfo_pos-recinfo), recinfo, 0, (MI_UNIQUEDEF*) 0, @@ -1718,3 +1767,22 @@ uint ha_myisam::checksum() const return (uint)file->state->checksum; } + +bool ha_myisam::check_if_incompatible_data(HA_CREATE_INFO *info, + uint table_changes) +{ + uint options= table->s->db_options_in_use; + + if (info->auto_increment_value != auto_increment_value || + info->data_file_name != data_file_name || + info->index_file_name != index_file_name || + table_changes == IS_EQUAL_NO) + return COMPATIBLE_DATA_NO; + + if ((options & (HA_OPTION_PACK_RECORD | HA_OPTION_CHECKSUM | + HA_OPTION_DELAY_KEY_WRITE)) != + (info->table_options & (HA_OPTION_PACK_RECORD | HA_OPTION_CHECKSUM | + HA_OPTION_DELAY_KEY_WRITE))) + return COMPATIBLE_DATA_NO; + return COMPATIBLE_DATA_YES; +} diff --git a/sql/ha_myisam.h b/sql/ha_myisam.h index ca684463311..86efed27478 100644 --- a/sql/ha_myisam.h +++ b/sql/ha_myisam.h @@ -43,7 +43,7 @@ class ha_myisam: public handler int repair(THD *thd, MI_CHECK ¶m, bool optimize); public: - ha_myisam(TABLE *table_arg); + ha_myisam(TABLE_SHARE *table_arg); ~ha_myisam() {} const char *table_type() const { return "MyISAM"; } const char *index_type(uint key_number); @@ -51,7 +51,7 @@ class ha_myisam: public handler ulong table_flags() const { return int_table_flags; } ulong index_flags(uint inx, uint part, bool all_parts) const { - return ((table->key_info[inx].algorithm == HA_KEY_ALG_FULLTEXT) ? + return ((table_share->key_info[inx].algorithm == HA_KEY_ALG_FULLTEXT) ? 0 : HA_READ_NEXT | HA_READ_PREV | HA_READ_RANGE | HA_READ_ORDER | HA_KEYREAD_ONLY); } @@ -60,6 +60,10 @@ class ha_myisam: public handler uint max_supported_key_part_length() const { return MI_MAX_KEY_LENGTH; } uint checksum() const; + virtual bool check_if_locking_is_allowed(uint sql_command, + ulong type, TABLE *table, + uint count, + bool called_by_logger_thread); int open(const char *name, int mode, uint test_if_locked); int close(void); int write_row(byte * buf); @@ -123,6 +127,7 @@ class ha_myisam: public handler int backup(THD* thd, HA_CHECK_OPT* check_opt); int assign_to_keycache(THD* thd, HA_CHECK_OPT* check_opt); int preload_keys(THD* thd, HA_CHECK_OPT* check_opt); + bool check_if_incompatible_data(HA_CREATE_INFO *info, uint table_changes); #ifdef HAVE_REPLICATION int dump(THD* thd, int fd); int net_read_dump(NET* net); diff --git a/sql/ha_myisammrg.cc b/sql/ha_myisammrg.cc index 9780f163634..ec015c5e320 100644 --- a/sql/ha_myisammrg.cc +++ b/sql/ha_myisammrg.cc @@ -25,16 +25,19 @@ #ifndef MASTER #include "../srclib/myisammrg/myrg_def.h" #else -#include "../myisammrg/myrg_def.h" +#include "../storage/myisammrg/myrg_def.h" #endif /***************************************************************************** ** MyISAM MERGE tables *****************************************************************************/ +static handler *myisammrg_create_handler(TABLE_SHARE *table); + /* MyISAM MERGE handlerton */ handlerton myisammrg_hton= { + MYSQL_HANDLERTON_INTERFACE_VERSION, "MRG_MYISAM", SHOW_OPTION_YES, "Collection of identical MyISAM tables", @@ -55,11 +58,28 @@ handlerton myisammrg_hton= { NULL, /* create_cursor_read_view */ NULL, /* set_cursor_read_view */ NULL, /* close_cursor_read_view */ - HTON_CAN_RECREATE + myisammrg_create_handler, /* Create a new handler */ + NULL, /* Drop a database */ + myrg_panic, /* Panic call */ + NULL, /* Start Consistent Snapshot */ + NULL, /* Flush logs */ + NULL, /* Show status */ + NULL, /* Partition flags */ + NULL, /* Alter table flags */ + NULL, /* Alter Tablespace */ + NULL, /* Fill Files Table */ + HTON_CAN_RECREATE, + NULL, /* binlog_func */ + NULL /* binlog_log_query */ }; +static handler *myisammrg_create_handler(TABLE_SHARE *table) +{ + return new ha_myisammrg(table); +} -ha_myisammrg::ha_myisammrg(TABLE *table_arg) + +ha_myisammrg::ha_myisammrg(TABLE_SHARE *table_arg) :handler(&myisammrg_hton, table_arg), file(0) {} @@ -91,13 +111,14 @@ int ha_myisammrg::open(const char *name, int mode, uint test_if_locked) char name_buff[FN_REFLEN]; DBUG_PRINT("info", ("ha_myisammrg::open")); - if (!(file=myrg_open(fn_format(name_buff,name,"","",2 | 4), mode, - test_if_locked))) + if (!(file=myrg_open(fn_format(name_buff,name,"","", + MY_UNPACK_FILENAME|MY_APPEND_EXT), + mode, test_if_locked))) { DBUG_PRINT("info", ("ha_myisammrg::open exit %d", my_errno)); return (my_errno ? my_errno : -1); } - DBUG_PRINT("info", ("ha_myisammrg::open myrg_extrafunc...")) + DBUG_PRINT("info", ("ha_myisammrg::open myrg_extrafunc...")); myrg_extrafunc(file, query_cache_invalidate_by_MyISAM_filename_ref); if (!(test_if_locked == HA_OPEN_WAIT_IF_LOCKED || test_if_locked == HA_OPEN_ABORT_IF_LOCKED)) @@ -286,7 +307,6 @@ void ha_myisammrg::info(uint flag) errkey = info.errkey; table->s->keys_in_use.set_prefix(table->s->keys); table->s->db_options_in_use= info.options; - table->s->is_view= 1; mean_rec_length= info.reclength; /* @@ -460,9 +480,9 @@ int ha_myisammrg::create(const char *name, register TABLE *form, for (pos= table_names; tables; tables= tables->next_local) { const char *table_name; - TABLE **tbl= 0; + TABLE *tbl= 0; if (create_info->options & HA_LEX_CREATE_TMP_TABLE) - tbl= find_temporary_table(thd, tables->db, tables->table_name); + tbl= find_temporary_table(thd, tables); if (!tbl) { /* @@ -476,8 +496,8 @@ int ha_myisammrg::create(const char *name, register TABLE *form, This means that it might not be possible to move the DATADIR of an embedded server without changing the paths in the .MRG file. */ - uint length= my_snprintf(buff, FN_REFLEN, "%s/%s/%s", mysql_data_home, - tables->db, tables->table_name); + uint length= build_table_filename(buff, sizeof(buff), + tables->db, tables->table_name, ""); /* If a MyISAM table is in the same directory as the MERGE table, we use the table name without a path. This means that the @@ -491,11 +511,13 @@ int ha_myisammrg::create(const char *name, register TABLE *form, DBUG_RETURN(HA_ERR_OUT_OF_MEM); } else - table_name= (*tbl)->s->path; + table_name= tbl->s->path.str; *pos++= table_name; } *pos=0; - DBUG_RETURN(myrg_create(fn_format(buff,name,"","",2+4+16), + DBUG_RETURN(myrg_create(fn_format(buff,name,"","", + MY_RESOLVE_SYMLINKS| + MY_UNPACK_FILENAME|MY_APPEND_EXT), table_names, create_info->merge_insert_method, (my_bool) 0)); @@ -507,6 +529,7 @@ void ha_myisammrg::append_create_info(String *packet) const char *current_db; uint db_length; THD *thd= current_thd; + MYRG_TABLE *open_table, *first; if (file->merge_insert_method != MERGE_INSERT_DISABLED) { @@ -514,10 +537,9 @@ void ha_myisammrg::append_create_info(String *packet) packet->append(get_type(&merge_insert_method,file->merge_insert_method-1)); } packet->append(STRING_WITH_LEN(" UNION=(")); - MYRG_TABLE *open_table,*first; - current_db= table->s->db; - db_length= (uint) strlen(current_db); + current_db= table->s->db.str; + db_length= table->s->db.length; for (first=open_table=file->open_tables ; open_table != file->end_table ; @@ -539,3 +561,14 @@ void ha_myisammrg::append_create_info(String *packet) } packet->append(')'); } + + +bool ha_myisammrg::check_if_incompatible_data(HA_CREATE_INFO *info, + uint table_changes) +{ + /* + For myisammrg, we should always re-generate the mapping file as this + is trivial to do + */ + return COMPATIBLE_DATA_NO; +} diff --git a/sql/ha_myisammrg.h b/sql/ha_myisammrg.h index c762b7c286e..4327b1c17b9 100644 --- a/sql/ha_myisammrg.h +++ b/sql/ha_myisammrg.h @@ -28,7 +28,7 @@ class ha_myisammrg: public handler MYRG_INFO *file; public: - ha_myisammrg(TABLE *table_arg); + ha_myisammrg(TABLE_SHARE *table_arg); ~ha_myisammrg() {} const char *table_type() const { return "MRG_MyISAM"; } const char **bas_ext() const; @@ -37,11 +37,12 @@ class ha_myisammrg: public handler { return (HA_REC_NOT_IN_SEQ | HA_AUTO_PART_KEY | HA_READ_RND_SAME | HA_NULL_IN_KEY | HA_CAN_INDEX_BLOBS | HA_FILE_BASED | - HA_CAN_INSERT_DELAYED | HA_ANY_INDEX_MAY_BE_UNIQUE); + HA_CAN_INSERT_DELAYED | HA_ANY_INDEX_MAY_BE_UNIQUE | + HA_NO_COPY_ON_ALTER); } ulong index_flags(uint inx, uint part, bool all_parts) const { - return ((table->key_info[inx].algorithm == HA_KEY_ALG_FULLTEXT) ? + return ((table_share->key_info[inx].algorithm == HA_KEY_ALG_FULLTEXT) ? 0 : HA_READ_NEXT | HA_READ_PREV | HA_READ_RANGE | HA_READ_ORDER | HA_KEYREAD_ONLY); } @@ -82,4 +83,5 @@ class ha_myisammrg: public handler void update_create_info(HA_CREATE_INFO *create_info); void append_create_info(String *packet); MYRG_INFO *myrg_info() { return file; } + bool check_if_incompatible_data(HA_CREATE_INFO *info, uint table_changes); }; diff --git a/sql/ha_ndbcluster.cc b/sql/ha_ndbcluster.cc index bc790ae0138..ff8a1221052 100644 --- a/sql/ha_ndbcluster.cc +++ b/sql/ha_ndbcluster.cc @@ -26,53 +26,78 @@ #include "mysql_priv.h" -#ifdef HAVE_NDBCLUSTER_DB #include <my_dir.h> #include "ha_ndbcluster.h" #include <ndbapi/NdbApi.hpp> #include <ndbapi/NdbScanFilter.hpp> +#include <../util/Bitmask.hpp> +#include <ndbapi/NdbIndexStat.hpp> + +#include "ha_ndbcluster_binlog.h" +#include "ha_ndbcluster_tables.h" + +#ifdef ndb_dynamite +#undef assert +#define assert(x) do { if(x) break; ::printf("%s %d: assert failed: %s\n", __FILE__, __LINE__, #x); ::fflush(stdout); ::signal(SIGABRT,SIG_DFL); ::abort(); ::kill(::getpid(),6); ::kill(::getpid(),9); } while (0) +#endif // options from from mysqld.cc extern my_bool opt_ndb_optimized_node_selection; extern const char *opt_ndbcluster_connectstring; +const char *ndb_distribution_names[]= {"KEYHASH", "LINHASH", NullS}; +TYPELIB ndb_distribution_typelib= { array_elements(ndb_distribution_names)-1, + "", ndb_distribution_names, NULL }; +const char *opt_ndb_distribution= ndb_distribution_names[ND_KEYHASH]; +enum ndb_distribution opt_ndb_distribution_id= ND_KEYHASH; + // Default value for parallelism static const int parallelism= 0; // Default value for max number of transactions // createable against NDB from this handler -static const int max_transactions= 2; - -static const char *ha_ndb_ext=".ndb"; +static const int max_transactions= 3; // should really be 2 but there is a transaction to much allocated when loch table is used -static int ndbcluster_close_connection(THD *thd); -static int ndbcluster_commit(THD *thd, bool all); -static int ndbcluster_rollback(THD *thd, bool all); +static uint ndbcluster_partition_flags(); +static uint ndbcluster_alter_table_flags(uint flags); +static bool ndbcluster_init(void); +static int ndbcluster_end(ha_panic_function flag); +static bool ndbcluster_show_status(THD*,stat_print_fn *,enum ha_stat_type); +static int ndbcluster_alter_tablespace(THD* thd, st_alter_tablespace *info); +static int ndbcluster_fill_files_table(THD *thd, TABLE_LIST *tables, COND *cond); handlerton ndbcluster_hton = { + MYSQL_HANDLERTON_INTERFACE_VERSION, "ndbcluster", SHOW_OPTION_YES, - "Clustered, fault-tolerant, memory-based tables", + "Clustered, fault-tolerant tables", DB_TYPE_NDBCLUSTER, ndbcluster_init, - 0, /* slot */ - 0, /* savepoint size */ - ndbcluster_close_connection, - NULL, /* savepoint_set */ - NULL, /* savepoint_rollback */ - NULL, /* savepoint_release */ - ndbcluster_commit, - ndbcluster_rollback, - NULL, /* prepare */ - NULL, /* recover */ - NULL, /* commit_by_xid */ - NULL, /* rollback_by_xid */ - NULL, /* create_cursor_read_view */ - NULL, /* set_cursor_read_view */ - NULL, /* close_cursor_read_view */ - HTON_NO_FLAGS + ~(uint)0, /* slot */ }; +static handler *ndbcluster_create_handler(TABLE_SHARE *table) +{ + return new ha_ndbcluster(table); +} + +static uint ndbcluster_partition_flags() +{ + return (HA_CAN_PARTITION | HA_CAN_UPDATE_PARTITION_KEY | + HA_CAN_PARTITION_UNIQUE | HA_USE_AUTO_PARTITION); +} + +static uint ndbcluster_alter_table_flags(uint flags) +{ + if (flags & ALTER_DROP_PARTITION) + return 0; + else + return (HA_ONLINE_ADD_INDEX | HA_ONLINE_DROP_INDEX | + HA_ONLINE_ADD_UNIQUE_INDEX | HA_ONLINE_DROP_UNIQUE_INDEX | + HA_PARTITION_FUNCTION_SUPPORTED); + +} + #define NDB_FAILED_AUTO_INCREMENT ~(Uint64)0 #define NDB_AUTO_INCREMENT_RETRIES 10 @@ -88,37 +113,40 @@ handlerton ndbcluster_hton = { DBUG_RETURN(ndb_to_mysql_error(&tmp)); \ } -// Typedefs for long names -typedef NdbDictionary::Column NDBCOL; -typedef NdbDictionary::Table NDBTAB; -typedef NdbDictionary::Index NDBINDEX; -typedef NdbDictionary::Dictionary NDBDICT; +#define ERR_BREAK(err, code) \ +{ \ + const NdbError& tmp= err; \ + ERR_PRINT(tmp); \ + code= ndb_to_mysql_error(&tmp); \ + break; \ +} -bool ndbcluster_inited= FALSE; +static int ndbcluster_inited= 0; +int ndbcluster_util_inited= 0; static Ndb* g_ndb= NULL; -static Ndb_cluster_connection* g_ndb_cluster_connection= NULL; +Ndb_cluster_connection* g_ndb_cluster_connection= NULL; +unsigned char g_node_id_map[max_ndb_nodes]; // Handler synchronization pthread_mutex_t ndbcluster_mutex; // Table lock handling -static HASH ndbcluster_open_tables; +HASH ndbcluster_open_tables; static byte *ndbcluster_get_key(NDB_SHARE *share,uint *length, my_bool not_used __attribute__((unused))); -static NDB_SHARE *get_share(const char *table_name); -static void free_share(NDB_SHARE *share); - -static int packfrm(const void *data, uint len, const void **pack_data, uint *pack_len); -static int unpackfrm(const void **data, uint *len, - const void* pack_data); +#ifdef HAVE_NDB_BINLOG +static int rename_share(NDB_SHARE *share, const char *new_key); +#endif +static void ndb_set_fragmentation(NDBTAB &tab, TABLE *table, uint pk_len); static int ndb_get_table_statistics(Ndb*, const char *, struct Ndb_statistics *); + // Util thread variables -static pthread_t ndb_util_thread; +pthread_t ndb_util_thread; pthread_mutex_t LOCK_ndb_util_thread; pthread_cond_t COND_ndb_util_thread; pthread_handler_t ndb_util_thread_func(void *arg); @@ -147,7 +175,7 @@ static long ndb_cluster_node_id= 0; static const char * ndb_connected_host= 0; static long ndb_connected_port= 0; static long ndb_number_of_replicas= 0; -static long ndb_number_of_storage_nodes= 0; +long ndb_number_of_storage_nodes= 0; static int update_status_variables(Ndb_cluster_connection *c) { @@ -159,7 +187,7 @@ static int update_status_variables(Ndb_cluster_connection *c) return 0; } -struct show_var_st ndb_status_variables[]= { +SHOW_VAR ndb_status_variables[]= { {"cluster_node_id", (char*) &ndb_cluster_node_id, SHOW_LONG}, {"connected_host", (char*) &ndb_connected_host, SHOW_CHAR_PTR}, {"connected_port", (char*) &ndb_connected_port, SHOW_LONG}, @@ -172,61 +200,63 @@ struct show_var_st ndb_status_variables[]= { Error handling functions */ -struct err_code_mapping -{ - int ndb_err; - int my_err; - int show_warning; -}; +/* Note for merge: old mapping table, moved to storage/ndb/ndberror.c */ -static const err_code_mapping err_map[]= +static int ndb_to_mysql_error(const NdbError *ndberr) { - { 626, HA_ERR_KEY_NOT_FOUND, 0 }, - { 630, HA_ERR_FOUND_DUPP_KEY, 0 }, - { 893, HA_ERR_FOUND_DUPP_KEY, 0 }, - { 721, HA_ERR_TABLE_EXIST, 1 }, - { 4244, HA_ERR_TABLE_EXIST, 1 }, - - { 709, HA_ERR_NO_SUCH_TABLE, 0 }, - - { 266, HA_ERR_LOCK_WAIT_TIMEOUT, 1 }, - { 274, HA_ERR_LOCK_WAIT_TIMEOUT, 1 }, - { 296, HA_ERR_LOCK_WAIT_TIMEOUT, 1 }, - { 297, HA_ERR_LOCK_WAIT_TIMEOUT, 1 }, - { 237, HA_ERR_LOCK_WAIT_TIMEOUT, 1 }, + /* read the mysql mapped error code */ + int error= ndberr->mysql_code; - { 623, HA_ERR_RECORD_FILE_FULL, 1 }, - { 624, HA_ERR_RECORD_FILE_FULL, 1 }, - { 625, HA_ERR_RECORD_FILE_FULL, 1 }, - { 826, HA_ERR_RECORD_FILE_FULL, 1 }, - { 827, HA_ERR_RECORD_FILE_FULL, 1 }, - { 832, HA_ERR_RECORD_FILE_FULL, 1 }, - - { 284, HA_ERR_TABLE_DEF_CHANGED, 0 }, - - { 0, 1, 0 }, - - { -1, -1, 1 } -}; + switch (error) + { + /* errors for which we do not add warnings, just return mapped error code + */ + case HA_ERR_NO_SUCH_TABLE: + case HA_ERR_KEY_NOT_FOUND: + case HA_ERR_FOUND_DUPP_KEY: + return error; + + /* Mapping missing, go with the ndb error code*/ + case -1: + error= ndberr->code; + break; + /* Mapping exists, go with the mapped code */ + default: + break; + } -static int ndb_to_mysql_error(const NdbError *err) -{ - uint i; - for (i=0; err_map[i].ndb_err != err->code && err_map[i].my_err != -1; i++); - if (err_map[i].show_warning) - { - // Push the NDB error message as warning + /* + Push the NDB error message as warning + - Used to be able to use SHOW WARNINGS toget more info on what the error is + - Used by replication to see if the error was temporary + */ + if (ndberr->status == NdbError::TemporaryError) push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR, - ER_GET_ERRMSG, ER(ER_GET_ERRMSG), - err->code, err->message, "NDB"); - } - if (err_map[i].my_err == -1) - return err->code; - return err_map[i].my_err; + ER_GET_TEMPORARY_ERRMSG, ER(ER_GET_TEMPORARY_ERRMSG), + ndberr->code, ndberr->message, "NDB"); + else + push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR, + ER_GET_ERRMSG, ER(ER_GET_ERRMSG), + ndberr->code, ndberr->message, "NDB"); + return error; } +int execute_no_commit_ignore_no_key(ha_ndbcluster *h, NdbTransaction *trans) +{ + int res= trans->execute(NdbTransaction::NoCommit, + NdbTransaction::AO_IgnoreError, + h->m_force_send); + if (res == 0) + return 0; + const NdbError &err= trans->getNdbError(); + if (err.classification != NdbError::ConstraintViolation && + err.classification != NdbError::NoDataFound) + return res; + + return 0; +} inline int execute_no_commit(ha_ndbcluster *h, NdbTransaction *trans) @@ -236,9 +266,11 @@ int execute_no_commit(ha_ndbcluster *h, NdbTransaction *trans) if (m_batch_execute) return 0; #endif - return trans->execute(NdbTransaction::NoCommit, - NdbTransaction::AbortOnError, - h->m_force_send); + return h->m_ignore_no_key ? + execute_no_commit_ignore_no_key(h,trans) : + trans->execute(NdbTransaction::NoCommit, + NdbTransaction::AbortOnError, + h->m_force_send); } inline @@ -291,6 +323,7 @@ Thd_ndb::Thd_ndb() all= NULL; stmt= NULL; error= 0; + options= 0; } Thd_ndb::~Thd_ndb() @@ -317,14 +350,6 @@ Thd_ndb::~Thd_ndb() } inline -Thd_ndb * -get_thd_ndb(THD *thd) { return (Thd_ndb *) thd->ha_data[ndbcluster_hton.slot]; } - -inline -void -set_thd_ndb(THD *thd, Thd_ndb *thd_ndb) { thd->ha_data[ndbcluster_hton.slot]= thd_ndb; } - -inline Ndb *ha_ndbcluster::get_ndb() { return get_thd_ndb(current_thd)->ndb; @@ -343,7 +368,7 @@ struct Ndb_local_table_statistics { void ha_ndbcluster::set_rec_per_key() { DBUG_ENTER("ha_ndbcluster::get_status_const"); - for (uint i=0 ; i < table->s->keys ; i++) + for (uint i=0 ; i < table_share->keys ; i++) { table->key_info[i].rec_per_key[table->key_info[i].key_parts-1]= 1; } @@ -441,34 +466,60 @@ void ha_ndbcluster::no_uncommitted_rows_reset(THD *thd) # The mapped error code */ -void ha_ndbcluster::invalidate_dictionary_cache(bool global) +int +ha_ndbcluster::invalidate_dictionary_cache(TABLE_SHARE *share, Ndb *ndb, + const char *dbname, const char *tabname, + bool global) { - NDBDICT *dict= get_ndb()->getDictionary(); + NDBDICT *dict= ndb->getDictionary(); DBUG_ENTER("invalidate_dictionary_cache"); - DBUG_PRINT("info", ("invalidating %s", m_tabname)); + DBUG_PRINT("info", ("invalidating %s", tabname)); + +#ifdef HAVE_NDB_BINLOG + char key[FN_REFLEN]; + build_table_filename(key, sizeof(key), dbname, tabname, ""); + DBUG_PRINT("info", ("Getting ndbcluster mutex")); + pthread_mutex_lock(&ndbcluster_mutex); + NDB_SHARE *ndb_share= (NDB_SHARE*)hash_search(&ndbcluster_open_tables, + (byte*) key, strlen(key)); + pthread_mutex_unlock(&ndbcluster_mutex); + DBUG_PRINT("info", ("Released ndbcluster mutex")); + // Only binlog_thread is allowed to globally invalidate a table + if (global && ndb_share && ndb_share->op && (current_thd != injector_thd)) + DBUG_RETURN(1); +#endif if (global) { - const NDBTAB *tab= dict->getTable(m_tabname); + const NDBTAB *tab= dict->getTable(tabname); if (!tab) - DBUG_VOID_RETURN; + DBUG_RETURN(1); if (tab->getObjectStatus() == NdbDictionary::Object::Invalid) { // Global cache has already been invalidated - dict->removeCachedTable(m_tabname); + dict->removeCachedTable(tabname); global= FALSE; } else - dict->invalidateTable(m_tabname); + dict->invalidateTable(tabname); } else - dict->removeCachedTable(m_tabname); - table->s->version=0L; /* Free when thread is ready */ + dict->removeCachedTable(tabname); + share->version=0L; /* Free when thread is ready */ + DBUG_RETURN(0); +} + +void ha_ndbcluster::invalidate_dictionary_cache(bool global) +{ + NDBDICT *dict= get_ndb()->getDictionary(); + if (invalidate_dictionary_cache(table_share, get_ndb(), m_dbname, m_tabname, global)) + return; /* Invalidate indexes */ - for (uint i= 0; i < table->s->keys; i++) + for (uint i= 0; i < table_share->keys; i++) { NDBINDEX *index = (NDBINDEX *) m_index[i].index; NDBINDEX *unique_index = (NDBINDEX *) m_index[i].unique_index; + if (!index && !unique_index) continue; NDB_INDEX_TYPE idx_type= m_index[i].type; switch (idx_type) { @@ -495,7 +546,6 @@ void ha_ndbcluster::invalidate_dictionary_cache(bool global) break; } } - DBUG_VOID_RETURN; } int ha_ndbcluster::ndb_err(NdbTransaction *trans) @@ -521,7 +571,7 @@ int ha_ndbcluster::ndb_err(NdbTransaction *trans) { err= dict->getNdbError(); DBUG_PRINT("info", ("Table not found, error: %d", err.code)); - if (err.code != 709) + if (err.code != 709 && err.code != 723) DBUG_RETURN(1); } DBUG_PRINT("info", ("Table exists but must have changed")); @@ -536,7 +586,7 @@ int ha_ndbcluster::ndb_err(NdbTransaction *trans) if (res == HA_ERR_FOUND_DUPP_KEY) { if (m_rows_to_insert == 1) - m_dupkey= table->s->primary_key; + m_dupkey= table_share->primary_key; else { /* We are batching inserts, offending key is not available */ @@ -621,8 +671,7 @@ bool ha_ndbcluster::set_hidden_key(NdbOperation *ndb_op, uint fieldnr, const byte *field_ptr) { DBUG_ENTER("set_hidden_key"); - DBUG_RETURN(ndb_op->equal(fieldnr, (char*)field_ptr, - NDB_HIDDEN_PRIMARY_KEY_LENGTH) != 0); + DBUG_RETURN(ndb_op->equal(fieldnr, (char*)field_ptr) != 0); } @@ -652,14 +701,15 @@ int ha_ndbcluster::set_ndb_key(NdbOperation *ndb_op, Field *field, */ int ha_ndbcluster::set_ndb_value(NdbOperation *ndb_op, Field *field, - uint fieldnr, bool *set_blob_value) + uint fieldnr, int row_offset, + bool *set_blob_value) { - const byte* field_ptr= field->ptr; - uint32 pack_len= field->pack_length(); + const byte* field_ptr= field->ptr + row_offset; + uint32 pack_len= field->pack_length(); DBUG_ENTER("set_ndb_value"); - DBUG_PRINT("enter", ("%d: %s, type: %u, len=%d, is_null=%s", + DBUG_PRINT("enter", ("%d: %s type: %u len=%d is_null=%s", fieldnr, field->field_name, field->type(), - pack_len, field->is_null()?"Y":"N")); + pack_len, field->is_null(row_offset) ? "Y" : "N")); DBUG_DUMP("value", (char*) field_ptr, pack_len); DBUG_ASSERT(ndb_supported_type(field->type())); @@ -670,7 +720,7 @@ int ha_ndbcluster::set_ndb_value(NdbOperation *ndb_op, Field *field, { pack_len= sizeof(empty_field); field_ptr= (byte *)&empty_field; - if (field->is_null()) + if (field->is_null(row_offset)) empty_field= 0; else empty_field= 1; @@ -679,13 +729,14 @@ int ha_ndbcluster::set_ndb_value(NdbOperation *ndb_op, Field *field, { if (field->type() != MYSQL_TYPE_BIT) { - if (field->is_null()) + if (field->is_null(row_offset)) + { + DBUG_PRINT("info", ("field is NULL")); // Set value to NULL - DBUG_RETURN((ndb_op->setValue(fieldnr, - (char*)NULL, pack_len) != 0)); + DBUG_RETURN((ndb_op->setValue(fieldnr, (char*)NULL) != 0)); + } // Common implementation for most field types - DBUG_RETURN(ndb_op->setValue(fieldnr, - (char*)field_ptr, pack_len) != 0); + DBUG_RETURN(ndb_op->setValue(fieldnr, (char*)field_ptr) != 0); } else // if (field->type() == MYSQL_TYPE_BIT) { @@ -694,26 +745,25 @@ int ha_ndbcluster::set_ndb_value(NdbOperation *ndb_op, Field *field, // Round up bit field length to nearest word boundry pack_len= ((pack_len + 3) >> 2) << 2; DBUG_ASSERT(pack_len <= 8); - if (field->is_null()) + if (field->is_null(row_offset)) // Set value to NULL - DBUG_RETURN((ndb_op->setValue(fieldnr, (char*)NULL, pack_len) != 0)); + DBUG_RETURN((ndb_op->setValue(fieldnr, (char*)NULL) != 0)); DBUG_PRINT("info", ("bit field")); DBUG_DUMP("value", (char*)&bits, pack_len); #ifdef WORDS_BIGENDIAN if (pack_len < 5) { - DBUG_RETURN(ndb_op->setValue(fieldnr, - ((char*)&bits)+4, pack_len) != 0); + DBUG_RETURN(ndb_op->setValue(fieldnr, ((char*)&bits)+4) != 0); } #endif - DBUG_RETURN(ndb_op->setValue(fieldnr, (char*)&bits, pack_len) != 0); + DBUG_RETURN(ndb_op->setValue(fieldnr, (char*)&bits) != 0); } } // Blob type NdbBlob *ndb_blob= ndb_op->getBlobHandle(fieldnr); if (ndb_blob != NULL) { - if (field->is_null()) + if (field->is_null(row_offset)) DBUG_RETURN(ndb_blob->setNull() != 0); Field_blob *field_blob= (Field_blob*)field; @@ -763,10 +813,20 @@ int g_get_ndb_blobs_value(NdbBlob *ndb_blob, void *arg) if (ndb_blob->blobsNextBlob() != NULL) DBUG_RETURN(0); ha_ndbcluster *ha= (ha_ndbcluster *)arg; - DBUG_RETURN(ha->get_ndb_blobs_value(ndb_blob)); + int ret= get_ndb_blobs_value(ha->table, ha->m_value, + ha->m_blobs_buffer, ha->m_blobs_buffer_size, + 0); + DBUG_RETURN(ret); } -int ha_ndbcluster::get_ndb_blobs_value(NdbBlob *last_ndb_blob) +/* + This routine is shared by injector. There is no common blobs buffer + so the buffer and length are passed by reference. Injector also + passes a record pointer diff. + */ +int get_ndb_blobs_value(TABLE* table, NdbValue* value_array, + byte*& buffer, uint& buffer_size, + my_ptrdiff_t ptrdiff) { DBUG_ENTER("get_ndb_blobs_value"); @@ -778,41 +838,63 @@ int ha_ndbcluster::get_ndb_blobs_value(NdbBlob *last_ndb_blob) for (uint i= 0; i < table->s->fields; i++) { Field *field= table->field[i]; - NdbValue value= m_value[i]; - if (value.ptr != NULL && (field->flags & BLOB_FLAG)) + NdbValue value= value_array[i]; + if (! (field->flags & BLOB_FLAG)) + continue; + if (value.blob == NULL) { - Field_blob *field_blob= (Field_blob *)field; - NdbBlob *ndb_blob= value.blob; - Uint64 blob_len= 0; - if (ndb_blob->getLength(blob_len) != 0) - DBUG_RETURN(-1); + DBUG_PRINT("info",("[%u] skipped", i)); + continue; + } + Field_blob *field_blob= (Field_blob *)field; + NdbBlob *ndb_blob= value.blob; + int isNull; + if (ndb_blob->getNull(isNull) != 0) + ERR_RETURN(ndb_blob->getNdbError()); + if (isNull == 0) { + Uint64 len64= 0; + if (ndb_blob->getLength(len64) != 0) + ERR_RETURN(ndb_blob->getNdbError()); // Align to Uint64 - uint32 blob_size= blob_len; - if (blob_size % 8 != 0) - blob_size+= 8 - blob_size % 8; + uint32 size= len64; + if (size % 8 != 0) + size+= 8 - size % 8; if (loop == 1) { - char *buf= m_blobs_buffer + offset; + char *buf= buffer + offset; uint32 len= 0xffffffff; // Max uint32 - DBUG_PRINT("value", ("read blob ptr=%x len=%u", - (UintPtr)buf, (uint)blob_len)); if (ndb_blob->readData(buf, len) != 0) - DBUG_RETURN(-1); - DBUG_ASSERT(len == blob_len); + ERR_RETURN(ndb_blob->getNdbError()); + DBUG_PRINT("info", ("[%u] offset=%u buf=%p len=%u [ptrdiff=%d]", + i, offset, buf, len, (int)ptrdiff)); + DBUG_ASSERT(len == len64); + // Ugly hack assumes only ptr needs to be changed + field_blob->ptr+= ptrdiff; field_blob->set_ptr(len, buf); + field_blob->ptr-= ptrdiff; } - offset+= blob_size; + offset+= size; + } + else if (loop == 1) // undefined or null + { + // have to set length even in this case + char *buf= buffer + offset; // or maybe NULL + uint32 len= 0; + field_blob->ptr+= ptrdiff; + field_blob->set_ptr(len, buf); + field_blob->ptr-= ptrdiff; + DBUG_PRINT("info", ("[%u] isNull=%d", i, isNull)); } } - if (loop == 0 && offset > m_blobs_buffer_size) + if (loop == 0 && offset > buffer_size) { - my_free(m_blobs_buffer, MYF(MY_ALLOW_ZERO_PTR)); - m_blobs_buffer_size= 0; - DBUG_PRINT("value", ("allocate blobs buffer size %u", offset)); - m_blobs_buffer= my_malloc(offset, MYF(MY_WME)); - if (m_blobs_buffer == NULL) + my_free(buffer, MYF(MY_ALLOW_ZERO_PTR)); + buffer_size= 0; + DBUG_PRINT("info", ("allocate blobs buffer size %u", offset)); + buffer= my_malloc(offset, MYF(MY_WME)); + if (buffer == NULL) DBUG_RETURN(-1); - m_blobs_buffer_size= offset; + buffer_size= offset; } } DBUG_RETURN(0); @@ -877,21 +959,18 @@ int ha_ndbcluster::get_ndb_value(NdbOperation *ndb_op, Field *field, /* Check if any set or get of blob value in current query. */ -bool ha_ndbcluster::uses_blob_value(bool all_fields) +bool ha_ndbcluster::uses_blob_value() { - if (table->s->blob_fields == 0) + if (table_share->blob_fields == 0) return FALSE; - if (all_fields) - return TRUE; { - uint no_fields= table->s->fields; + uint no_fields= table_share->fields; int i; - THD *thd= current_thd; // They always put blobs at the end.. for (i= no_fields - 1; i >= 0; i--) { - Field *field= table->field[i]; - if (thd->query_id == field->query_id) + if ((m_write_op && ha_get_bit_in_write_set(i+1)) || + (!m_write_op && ha_get_bit_in_read_set(i+1))) { return TRUE; } @@ -907,8 +986,25 @@ bool ha_ndbcluster::uses_blob_value(bool all_fields) IMPLEMENTATION - check that frm-file on disk is equal to frm-file of table accessed in NDB + + RETURN + 0 ok + -2 Meta data has changed; Re-read data and try again */ +int cmp_frm(const NDBTAB *ndbtab, const void *pack_data, + uint pack_length) +{ + DBUG_ENTER("cmp_frm"); + /* + Compare FrmData in NDB with frm file from disk. + */ + if ((pack_length != ndbtab->getFrmLength()) || + (memcmp(pack_data, ndbtab->getFrmData(), pack_length))) + DBUG_RETURN(1); + DBUG_RETURN(0); +} + int ha_ndbcluster::get_metadata(const char *path) { Ndb *ndb= get_ndb(); @@ -916,7 +1012,6 @@ int ha_ndbcluster::get_metadata(const char *path) const NDBTAB *tab; int error; bool invalidating_ndb_table= FALSE; - DBUG_ENTER("get_metadata"); DBUG_PRINT("enter", ("m_tabname: %s, path: %s", m_tabname, path)); @@ -946,8 +1041,8 @@ int ha_ndbcluster::get_metadata(const char *path) DBUG_RETURN(1); } - if ((pack_length != tab->getFrmLength()) || - (memcmp(pack_data, tab->getFrmData(), pack_length))) + if (get_ndb_share_state(m_share) != NSS_ALTERED + && cmp_frm(tab, pack_data, pack_length)) { if (!invalidating_ndb_table) { @@ -958,12 +1053,12 @@ int ha_ndbcluster::get_metadata(const char *path) else { DBUG_PRINT("error", - ("metadata, pack_length: %d getFrmLength: %d memcmp: %d", + ("metadata, pack_length: %d getFrmLength: %d memcmp: %d", pack_length, tab->getFrmLength(), - memcmp(pack_data, tab->getFrmData(), pack_length))); + memcmp(pack_data, tab->getFrmData(), pack_length))); DBUG_DUMP("pack_data", (char*)pack_data, pack_length); DBUG_DUMP("frm", (char*)tab->getFrmData(), tab->getFrmLength()); - error= 3; + error= HA_ERR_TABLE_DEF_CHANGED; invalidating_ndb_table= FALSE; } } @@ -982,7 +1077,7 @@ int ha_ndbcluster::get_metadata(const char *path) m_table= (void *)tab; m_table_info= NULL; // Set in external lock - DBUG_RETURN(build_index_list(ndb, table, ILBP_OPEN)); + DBUG_RETURN(open_indexes(ndb, table, FALSE)); } static int fix_unique_index_attr_order(NDB_INDEX_DATA &data, @@ -1019,97 +1114,272 @@ static int fix_unique_index_attr_order(NDB_INDEX_DATA &data, DBUG_RETURN(0); } -int ha_ndbcluster::build_index_list(Ndb *ndb, TABLE *tab, enum ILBP phase) +int ha_ndbcluster::table_changed(const void *pack_frm_data, uint pack_frm_len) +{ + Ndb *ndb; + NDBDICT *dict; + const NDBTAB *orig_tab; + NdbDictionary::Table new_tab; + int result; + DBUG_ENTER("ha_ndbcluster::table_changed"); + DBUG_PRINT("info", ("Modifying frm for table %s", m_tabname)); + if (check_ndb_connection()) + DBUG_RETURN(my_errno= HA_ERR_NO_CONNECTION); + + ndb= get_ndb(); + dict= ndb->getDictionary(); + if (!(orig_tab= dict->getTable(m_tabname))) + ERR_RETURN(dict->getNdbError()); + // Check if thread has stale local cache + if (orig_tab->getObjectStatus() == NdbDictionary::Object::Invalid) + { + dict->removeCachedTable(m_tabname); + if (!(orig_tab= dict->getTable(m_tabname))) + ERR_RETURN(dict->getNdbError()); + } + new_tab= *orig_tab; + new_tab.setFrm(pack_frm_data, pack_frm_len); + if (dict->alterTable(new_tab) != 0) + ERR_RETURN(dict->getNdbError()); + DBUG_RETURN(0); +} + +/* + Create all the indexes for a table. + If any index should fail to be created, + the error is returned immediately +*/ +int ha_ndbcluster::create_indexes(Ndb *ndb, TABLE *tab) { uint i; int error= 0; const char *index_name; - char unique_index_name[FN_LEN]; - static const char* unique_suffix= "$unique"; KEY* key_info= tab->key_info; const char **key_name= tab->s->keynames.type_names; NDBDICT *dict= ndb->getDictionary(); - DBUG_ENTER("ha_ndbcluster::build_index_list"); + DBUG_ENTER("ha_ndbcluster::create_indexes"); - // Save information about all known indexes for (i= 0; i < tab->s->keys; i++, key_info++, key_name++) { index_name= *key_name; NDB_INDEX_TYPE idx_type= get_index_type_from_table(i); - m_index[i].type= idx_type; - if (idx_type == UNIQUE_ORDERED_INDEX || idx_type == UNIQUE_INDEX) + error= create_index(index_name, key_info, idx_type, i); + if (error) { - strxnmov(unique_index_name, FN_LEN, index_name, unique_suffix, NullS); - DBUG_PRINT("info", ("Created unique index name \'%s\' for index %d", - unique_index_name, i)); + DBUG_PRINT("error", ("Failed to create index %u", i)); + break; } - // Create secondary indexes if in create phase - if (phase == ILBP_CREATE) + } + + DBUG_RETURN(error); +} + +void ha_ndbcluster::clear_index(int i) +{ + m_index[i].type= UNDEFINED_INDEX; + m_index[i].status= UNDEFINED; + m_index[i].unique_index= NULL; + m_index[i].index= NULL; + m_index[i].unique_index_attrid_map= NULL; + m_index[i].index_stat=NULL; + m_index[i].index_stat_cache_entries=0; + m_index[i].index_stat_update_freq=0; + m_index[i].index_stat_query_count=0; +} + +void ha_ndbcluster::clear_indexes() +{ + for (int i= 0; i < MAX_KEY; i++) clear_index(i); +} + +/* + Associate a direct reference to an index handle + with an index (for faster access) + */ +int ha_ndbcluster::add_index_handle(THD *thd, NDBDICT *dict, KEY *key_info, + const char *index_name, uint index_no) +{ + int error= 0; + NDB_INDEX_TYPE idx_type= get_index_type_from_table(index_no); + m_index[index_no].type= idx_type; + DBUG_ENTER("ha_ndbcluster::get_index_handle"); + + if (idx_type != PRIMARY_KEY_INDEX && idx_type != UNIQUE_INDEX) + { + DBUG_PRINT("info", ("Get handle to index %s", index_name)); + const NDBINDEX *index= dict->getIndex(index_name, m_tabname); + if (!index) ERR_RETURN(dict->getNdbError()); + m_index[index_no].index= (void *) index; + // ordered index - add stats + NDB_INDEX_DATA& d=m_index[index_no]; + delete d.index_stat; + d.index_stat=NULL; + if (thd->variables.ndb_index_stat_enable) { - DBUG_PRINT("info", ("Creating index %u: %s", i, index_name)); - switch (idx_type){ - - case PRIMARY_KEY_INDEX: - // Do nothing, already created - break; - case PRIMARY_KEY_ORDERED_INDEX: - error= create_ordered_index(index_name, key_info); - break; - case UNIQUE_ORDERED_INDEX: - if (!(error= create_ordered_index(index_name, key_info))) - error= create_unique_index(unique_index_name, key_info); - break; - case UNIQUE_INDEX: - if (!(error= check_index_fields_not_null(i))) - error= create_unique_index(unique_index_name, key_info); - break; - case ORDERED_INDEX: - error= create_ordered_index(index_name, key_info); - break; - default: - DBUG_ASSERT(FALSE); + d.index_stat=new NdbIndexStat(index); + d.index_stat_cache_entries=thd->variables.ndb_index_stat_cache_entries; + d.index_stat_update_freq=thd->variables.ndb_index_stat_update_freq; + d.index_stat_query_count=0; + d.index_stat->alloc_cache(d.index_stat_cache_entries); + DBUG_PRINT("info", ("index %s stat=on cache_entries=%u update_freq=%u", + index->getName(), + d.index_stat_cache_entries, + d.index_stat_update_freq)); + } else + { + DBUG_PRINT("info", ("index %s stat=off", index->getName())); + } + } + if (idx_type == UNIQUE_ORDERED_INDEX || idx_type == UNIQUE_INDEX) + { + char unique_index_name[FN_LEN]; + static const char* unique_suffix= "$unique"; + strxnmov(unique_index_name, FN_LEN, index_name, unique_suffix, NullS); + DBUG_PRINT("info", ("Get handle to unique_index %s", unique_index_name)); + const NDBINDEX *index= dict->getIndex(unique_index_name, m_tabname); + if (!index) ERR_RETURN(dict->getNdbError()); + m_index[index_no].unique_index= (void *) index; + error= fix_unique_index_attr_order(m_index[index_no], index, key_info); + } + if (!error) + m_index[index_no].status= ACTIVE; + + DBUG_RETURN(error); +} + +/* + Associate index handles for each index of a table +*/ +int ha_ndbcluster::open_indexes(Ndb *ndb, TABLE *tab, bool ignore_error) +{ + uint i; + int error= 0; + THD *thd=current_thd; + NDBDICT *dict= ndb->getDictionary(); + const char *index_name; + KEY* key_info= tab->key_info; + const char **key_name= tab->s->keynames.type_names; + DBUG_ENTER("ha_ndbcluster::open_indexes"); + + for (i= 0; i < tab->s->keys; i++, key_info++, key_name++) + { + if ((error= add_index_handle(thd, dict, key_info, *key_name, i))) + if (ignore_error) + m_index[i].index= m_index[i].unique_index= NULL; + else break; - } - if (error) + } + + DBUG_RETURN(error); +} + +/* + Renumber indexes in index list by shifting out + indexes that are to be dropped + */ +void ha_ndbcluster::renumber_indexes(Ndb *ndb, TABLE *tab) +{ + uint i; + const char *index_name; + KEY* key_info= tab->key_info; + const char **key_name= tab->s->keynames.type_names; + NDBDICT *dict= ndb->getDictionary(); + DBUG_ENTER("ha_ndbcluster::renumber_indexes"); + + for (i= 0; i < tab->s->keys; i++, key_info++, key_name++) + { + index_name= *key_name; + NDB_INDEX_TYPE idx_type= get_index_type_from_table(i); + m_index[i].type= idx_type; + if (m_index[i].status == TO_BE_DROPPED) + { + DBUG_PRINT("info", ("Shifting index %s(%i) out of the list", + index_name, i)); + NDB_INDEX_DATA tmp; + uint j= i + 1; + // Shift index out of list + while(j != MAX_KEY && m_index[j].status != UNDEFINED) { - DBUG_PRINT("error", ("Failed to create index %u", i)); - drop_table(); - break; + tmp= m_index[j - 1]; + m_index[j - 1]= m_index[j]; + m_index[j]= tmp; + j++; } } - // Add handles to index objects - if (idx_type != PRIMARY_KEY_INDEX && idx_type != UNIQUE_INDEX) - { - DBUG_PRINT("info", ("Get handle to index %s", index_name)); - const NDBINDEX *index= dict->getIndex(index_name, m_tabname); - if (!index) DBUG_RETURN(1); - m_index[i].index= (void *) index; - } - if (idx_type == UNIQUE_ORDERED_INDEX || idx_type == UNIQUE_INDEX) + } + + DBUG_VOID_RETURN; +} + +/* + Drop all indexes that are marked for deletion +*/ +int ha_ndbcluster::drop_indexes(Ndb *ndb, TABLE *tab) +{ + uint i; + int error= 0; + const char *index_name; + KEY* key_info= tab->key_info; + const char **key_name= tab->s->keynames.type_names; + NDBDICT *dict= ndb->getDictionary(); + DBUG_ENTER("ha_ndbcluster::drop_indexes"); + + for (i= 0; i < tab->s->keys; i++, key_info++, key_name++) + { + index_name= *key_name; + NDB_INDEX_TYPE idx_type= get_index_type_from_table(i); + m_index[i].type= idx_type; + if (m_index[i].status == TO_BE_DROPPED) { - DBUG_PRINT("info", ("Get handle to unique_index %s", unique_index_name)); - const NDBINDEX *index= dict->getIndex(unique_index_name, m_tabname); - if (!index) DBUG_RETURN(1); - m_index[i].unique_index= (void *) index; - error= fix_unique_index_attr_order(m_index[i], index, key_info); + NdbDictionary::Index *index= + (NdbDictionary::Index *) m_index[i].index; + NdbDictionary::Index *unique_index= + (NdbDictionary::Index *) m_index[i].unique_index; + + if (index) + { + index_name= index->getName(); + DBUG_PRINT("info", ("Dropping index %u: %s", i, index_name)); + // Drop ordered index from ndb + error= drop_ndb_index(index_name); + } + if (!error) + m_index[i].index= NULL; + if (!error && unique_index) + { + index_name= index->getName(); + DBUG_PRINT("info", ("Dropping index %u: %s", i, index_name)); + // Drop unique index from ndb + error= drop_ndb_index(index_name); + } + if (error) + DBUG_RETURN(error); + clear_index(i); + continue; } } DBUG_RETURN(error); } - /* Decode the type of an index from information provided in table object */ NDB_INDEX_TYPE ha_ndbcluster::get_index_type_from_table(uint inx) const { - bool is_hash_index= (table->key_info[inx].algorithm == HA_KEY_ALG_HASH); - if (inx == table->s->primary_key) - return is_hash_index ? PRIMARY_KEY_INDEX : PRIMARY_KEY_ORDERED_INDEX; + return get_index_type_from_key(inx, table_share->key_info); +} - return ((table->key_info[inx].flags & HA_NOSAME) ? +NDB_INDEX_TYPE ha_ndbcluster::get_index_type_from_key(uint inx, + KEY *key_info) const +{ + bool is_hash_index= (key_info[inx].algorithm == + HA_KEY_ALG_HASH); + if (inx == table_share->primary_key) + return is_hash_index ? PRIMARY_KEY_INDEX : PRIMARY_KEY_ORDERED_INDEX; + + return ((key_info[inx].flags & HA_NOSAME) ? (is_hash_index ? UNIQUE_INDEX : UNIQUE_ORDERED_INDEX) : ORDERED_INDEX); } @@ -1155,6 +1425,8 @@ void ha_ndbcluster::release_metadata() my_free((char *)m_index[i].unique_index_attrid_map, MYF(0)); m_index[i].unique_index_attrid_map= NULL; } + delete m_index[i].index_stat; + m_index[i].index_stat=NULL; } DBUG_VOID_RETURN; @@ -1164,7 +1436,7 @@ int ha_ndbcluster::get_ndb_lock_type(enum thr_lock_type type) { if (type >= TL_WRITE_ALLOW_WRITE) return NdbOperation::LM_Exclusive; - else if (uses_blob_value(m_retrieve_all_fields)) + else if (uses_blob_value()) return NdbOperation::LM_Read; else return NdbOperation::LM_CommittedRead; @@ -1226,7 +1498,7 @@ inline ulong ha_ndbcluster::index_flags(uint idx_no, uint part, bool all_parts) const { DBUG_ENTER("ha_ndbcluster::index_flags"); - DBUG_PRINT("info", ("idx_no: %d", idx_no)); + DBUG_PRINT("enter", ("idx_no: %u", idx_no)); DBUG_ASSERT(get_index_type_from_table(idx_no) < index_flags_size); DBUG_RETURN(index_type_flags[get_index_type_from_table(idx_no)] | HA_KEY_SCAN_NOT_ROR); @@ -1253,7 +1525,7 @@ static void shrink_varchar(Field* field, const byte* & ptr, char* buf) int ha_ndbcluster::set_primary_key(NdbOperation *op, const byte *key) { - KEY* key_info= table->key_info + table->s->primary_key; + KEY* key_info= table->key_info + table_share->primary_key; KEY_PART_INFO* key_part= key_info->key_part; KEY_PART_INFO* end= key_part+key_info->key_parts; DBUG_ENTER("set_primary_key"); @@ -1275,7 +1547,7 @@ int ha_ndbcluster::set_primary_key(NdbOperation *op, const byte *key) int ha_ndbcluster::set_primary_key_from_record(NdbOperation *op, const byte *record) { - KEY* key_info= table->key_info + table->s->primary_key; + KEY* key_info= table->key_info + table_share->primary_key; KEY_PART_INFO* key_part= key_info->key_part; KEY_PART_INFO* end= key_part+key_info->key_parts; DBUG_ENTER("set_primary_key_from_record"); @@ -1317,17 +1589,14 @@ inline int ha_ndbcluster::define_read_attrs(byte* buf, NdbOperation* op) { uint i; - THD *thd= current_thd; - DBUG_ENTER("define_read_attrs"); // Define attributes to read - for (i= 0; i < table->s->fields; i++) + for (i= 0; i < table_share->fields; i++) { Field *field= table->field[i]; - if ((thd->query_id == field->query_id) || - ((field->flags & PRI_KEY_FLAG)) || - m_retrieve_all_fields) + if (ha_get_bit_in_read_set(i+1) || + ((field->flags & PRI_KEY_FLAG))) { if (get_ndb_value(op, field, i, buf)) ERR_RETURN(op->getNdbError()); @@ -1338,11 +1607,11 @@ int ha_ndbcluster::define_read_attrs(byte* buf, NdbOperation* op) } } - if (table->s->primary_key == MAX_KEY) + if (table_share->primary_key == MAX_KEY) { DBUG_PRINT("info", ("Getting hidden key")); // Scanning table with no primary key - int hidden_no= table->s->fields; + int hidden_no= table_share->fields; #ifndef DBUG_OFF const NDBTAB *tab= (const NDBTAB *) m_table; if (!tab->getColumn(hidden_no)) @@ -1354,13 +1623,15 @@ int ha_ndbcluster::define_read_attrs(byte* buf, NdbOperation* op) DBUG_RETURN(0); } + /* Read one record from NDB using primary key */ -int ha_ndbcluster::pk_read(const byte *key, uint key_len, byte *buf) +int ha_ndbcluster::pk_read(const byte *key, uint key_len, byte *buf, + uint32 part_id) { - uint no_fields= table->s->fields; + uint no_fields= table_share->fields; NdbConnection *trans= m_active_trans; NdbOperation *op; @@ -1368,6 +1639,7 @@ int ha_ndbcluster::pk_read(const byte *key, uint key_len, byte *buf) DBUG_ENTER("pk_read"); DBUG_PRINT("enter", ("key_len: %u", key_len)); DBUG_DUMP("key", (char*)key, key_len); + m_write_op= FALSE; NdbOperation::LockMode lm= (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type); @@ -1375,7 +1647,9 @@ int ha_ndbcluster::pk_read(const byte *key, uint key_len, byte *buf) op->readTuple(lm) != 0) ERR_RETURN(trans->getNdbError()); - if (table->s->primary_key == MAX_KEY) + if (m_use_partition_function) + op->setPartitionId(part_id); + if (table_share->primary_key == MAX_KEY) { // This table has no primary key, use "hidden" primary key DBUG_PRINT("info", ("Using hidden key")); @@ -1410,34 +1684,50 @@ int ha_ndbcluster::pk_read(const byte *key, uint key_len, byte *buf) /* Read one complementing record from NDB using primary key from old_data + or hidden key */ -int ha_ndbcluster::complemented_pk_read(const byte *old_data, byte *new_data) +int ha_ndbcluster::complemented_read(const byte *old_data, byte *new_data, + uint32 old_part_id) { - uint no_fields= table->s->fields, i; + uint no_fields= table_share->fields, i; NdbTransaction *trans= m_active_trans; NdbOperation *op; - THD *thd= current_thd; - DBUG_ENTER("complemented_pk_read"); + DBUG_ENTER("complemented_read"); + m_write_op= FALSE; - if (m_retrieve_all_fields) + if (ha_get_all_bit_in_read_set()) + { // We have allready retrieved all fields, nothing to complement DBUG_RETURN(0); + } NdbOperation::LockMode lm= (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type); if (!(op= trans->getNdbOperation((const NDBTAB *) m_table)) || op->readTuple(lm) != 0) ERR_RETURN(trans->getNdbError()); - int res; - if ((res= set_primary_key_from_record(op, old_data))) - ERR_RETURN(trans->getNdbError()); + if (table_share->primary_key != MAX_KEY) + { + if (set_primary_key_from_record(op, old_data)) + ERR_RETURN(trans->getNdbError()); + } + else + { + // This table has no primary key, use "hidden" primary key + if (set_hidden_key(op, table->s->fields, m_ref)) + ERR_RETURN(op->getNdbError()); + } + + if (m_use_partition_function) + op->setPartitionId(old_part_id); + // Read all unreferenced non-key field(s) for (i= 0; i < no_fields; i++) { Field *field= table->field[i]; if (!((field->flags & PRI_KEY_FLAG) || - (thd->query_id == field->query_id))) + (ha_get_bit_in_read_set(i+1)))) { if (get_ndb_value(op, field, i, new_data)) ERR_RETURN(trans->getNdbError()); @@ -1461,7 +1751,7 @@ int ha_ndbcluster::complemented_pk_read(const byte *old_data, byte *new_data) { Field *field= table->field[i]; if (!((field->flags & PRI_KEY_FLAG) || - (thd->query_id == field->query_id))) + (ha_get_bit_in_read_set(i+1)))) { m_value[i].ptr= NULL; } @@ -1490,6 +1780,19 @@ int ha_ndbcluster::peek_row(const byte *record) if ((res= set_primary_key_from_record(op, record))) ERR_RETURN(trans->getNdbError()); + if (m_use_partition_function) + { + uint32 part_id; + int error; + longlong func_value; + if ((error= m_part_info->get_partition_id(m_part_info, &part_id, + &func_value))) + { + DBUG_RETURN(error); + } + op->setPartitionId(part_id); + } + if (execute_no_commit_ie(this,trans) != 0) { table->status= STATUS_NOT_FOUND; @@ -1649,10 +1952,12 @@ inline int ha_ndbcluster::next_result(byte *buf) */ int ha_ndbcluster::set_bounds(NdbIndexScanOperation *op, + uint inx, + bool rir, const key_range *keys[2], uint range_no) { - const KEY *const key_info= table->key_info + active_index; + const KEY *const key_info= table->key_info + inx; const uint key_parts= key_info->key_parts; uint key_tot_len[2]; uint tot_len; @@ -1717,7 +2022,10 @@ int ha_ndbcluster::set_bounds(NdbIndexScanOperation *op, switch (p.key->flag) { case HA_READ_KEY_EXACT: - p.bound_type= NdbIndexScanOperation::BoundEQ; + if (! rir) + p.bound_type= NdbIndexScanOperation::BoundEQ; + else // differs for records_in_range + p.bound_type= NdbIndexScanOperation::BoundLE; break; // ascending case HA_READ_KEY_OR_NEXT: @@ -1828,7 +2136,8 @@ int ha_ndbcluster::set_bounds(NdbIndexScanOperation *op, int ha_ndbcluster::ordered_index_scan(const key_range *start_key, const key_range *end_key, - bool sorted, bool descending, byte* buf) + bool sorted, bool descending, + byte* buf, part_id_range *part_spec) { int res; bool restart; @@ -1839,6 +2148,7 @@ int ha_ndbcluster::ordered_index_scan(const key_range *start_key, DBUG_PRINT("enter", ("index: %u, sorted: %d, descending: %d", active_index, sorted, descending)); DBUG_PRINT("enter", ("Starting new ordered scan on %s", m_tabname)); + m_write_op= FALSE; // Check that sorted seems to be initialised DBUG_ASSERT(sorted == 0 || sorted == 1); @@ -1853,11 +2163,17 @@ int ha_ndbcluster::ordered_index_scan(const key_range *start_key, (const NDBTAB *) m_table)) || op->readTuples(lm, 0, parallelism, sorted, descending)) ERR_RETURN(trans->getNdbError()); + if (m_use_partition_function && part_spec != NULL && + part_spec->start_part == part_spec->end_part) + op->setPartitionId(part_spec->start_part); m_active_cursor= op; } else { restart= TRUE; op= (NdbIndexScanOperation*)m_active_cursor; + if (m_use_partition_function && part_spec != NULL && + part_spec->start_part == part_spec->end_part) + op->setPartitionId(part_spec->start_part); DBUG_ASSERT(op->getSorted() == sorted); DBUG_ASSERT(op->getLockMode() == (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type)); @@ -1867,7 +2183,7 @@ int ha_ndbcluster::ordered_index_scan(const key_range *start_key, { const key_range *keys[2]= { start_key, end_key }; - res= set_bounds(op, keys); + res= set_bounds(op, active_index, false, keys); if (res) DBUG_RETURN(res); } @@ -1895,9 +2211,11 @@ int ha_ndbcluster::full_table_scan(byte *buf) int res; NdbScanOperation *op; NdbTransaction *trans= m_active_trans; + part_id_range part_spec; DBUG_ENTER("full_table_scan"); DBUG_PRINT("enter", ("Starting new scan on %s", m_tabname)); + m_write_op= FALSE; NdbOperation::LockMode lm= (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type); @@ -1905,6 +2223,35 @@ int ha_ndbcluster::full_table_scan(byte *buf) op->readTuples(lm, 0, parallelism)) ERR_RETURN(trans->getNdbError()); m_active_cursor= op; + + if (m_use_partition_function) + { + part_spec.start_part= 0; + part_spec.end_part= m_part_info->get_tot_partitions() - 1; + prune_partition_set(table, &part_spec); + DBUG_PRINT("info", ("part_spec.start_part = %u, part_spec.end_part = %u", + part_spec.start_part, part_spec.end_part)); + /* + If partition pruning has found no partition in set + we can return HA_ERR_END_OF_FILE + If partition pruning has found exactly one partition in set + we can optimize scan to run towards that partition only. + */ + if (part_spec.start_part > part_spec.end_part) + { + DBUG_RETURN(HA_ERR_END_OF_FILE); + } + else if (part_spec.start_part == part_spec.end_part) + { + /* + Only one partition is required to scan, if sorted is required we + don't need it any more since output from one ordered partitioned + index is always sorted. + */ + m_active_cursor->setPartitionId(part_spec.start_part); + } + } + if (generate_scan_filter(m_cond_stack, op)) DBUG_RETURN(ndb_err(trans)); if ((res= define_read_attrs(buf, op))) @@ -1927,16 +2274,17 @@ int ha_ndbcluster::write_row(byte *record) NdbOperation *op; int res; THD *thd= current_thd; + longlong func_value= 0; + DBUG_ENTER("ha_ndbcluster::write_row"); - DBUG_ENTER("write_row"); - - if (m_ignore_dup_key && table->s->primary_key != MAX_KEY) + m_write_op= TRUE; + if (!m_use_write && m_ignore_dup_key && table_share->primary_key != MAX_KEY) { int peek_res= peek_row(record); if (!peek_res) { - m_dupkey= table->s->primary_key; + m_dupkey= table_share->primary_key; DBUG_RETURN(HA_ERR_FOUND_DUPP_KEY); } if (peek_res != HA_ERR_KEY_NOT_FOUND) @@ -1955,7 +2303,19 @@ int ha_ndbcluster::write_row(byte *record) if (res != 0) ERR_RETURN(trans->getNdbError()); - if (table->s->primary_key == MAX_KEY) + if (m_use_partition_function) + { + uint32 part_id; + int error; + if ((error= m_part_info->get_partition_id(m_part_info, &part_id, + &func_value))) + { + DBUG_RETURN(error); + } + op->setPartitionId(part_id); + } + + if (table_share->primary_key == MAX_KEY) { // Table has hidden primary key Ndb *ndb= get_ndb(); @@ -1968,7 +2328,7 @@ int ha_ndbcluster::write_row(byte *record) ndb->getNdbError().status == NdbError::TemporaryError); if (auto_value == NDB_FAILED_AUTO_INCREMENT) ERR_RETURN(ndb->getNdbError()); - if (set_hidden_key(op, table->s->fields, (const byte*)&auto_value)) + if (set_hidden_key(op, table_share->fields, (const byte*)&auto_value)) ERR_RETURN(op->getNdbError()); } else @@ -1992,17 +2352,34 @@ int ha_ndbcluster::write_row(byte *record) // Set non-key attribute(s) bool set_blob_value= FALSE; - for (i= 0; i < table->s->fields; i++) + for (i= 0; i < table_share->fields; i++) { Field *field= table->field[i]; if (!(field->flags & PRI_KEY_FLAG) && - set_ndb_value(op, field, i, &set_blob_value)) + (ha_get_bit_in_write_set(i + 1) || !m_use_write) && + set_ndb_value(op, field, i, record-table->record[0], &set_blob_value)) { m_skip_auto_increment= TRUE; ERR_RETURN(op->getNdbError()); } } + if (m_use_partition_function) + { + /* + We need to set the value of the partition function value in + NDB since the NDB kernel doesn't have easy access to the function + to calculate the value. + */ + if (func_value >= INT_MAX32) + func_value= INT_MAX32; + uint32 part_func_value= (uint32)func_value; + uint no_fields= table_share->fields; + if (table_share->primary_key == MAX_KEY) + no_fields++; + op->setValue(no_fields, part_func_value); + } + m_rows_changed++; /* @@ -2112,28 +2489,44 @@ int ha_ndbcluster::update_row(const byte *old_data, byte *new_data) NdbScanOperation* cursor= m_active_cursor; NdbOperation *op; uint i; + uint32 old_part_id= 0, new_part_id= 0; + int error; + longlong func_value; DBUG_ENTER("update_row"); + m_write_op= TRUE; statistic_increment(thd->status_var.ha_update_count, &LOCK_status); if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE) { table->timestamp_field->set_time(); - // Set query_id so that field is really updated - table->timestamp_field->query_id= thd->query_id; + ha_set_bit_in_write_set(table->timestamp_field->fieldnr); } - /* Check for update of primary key for special handling */ - if ((table->s->primary_key != MAX_KEY) && - (key_cmp(table->s->primary_key, old_data, new_data))) + if (m_use_partition_function && + (error= get_parts_for_update(old_data, new_data, table->record[0], + m_part_info, &old_part_id, &new_part_id, + &func_value))) + { + DBUG_RETURN(error); + } + + /* + * Check for update of primary key or partition change + * for special handling + */ + if (((table_share->primary_key != MAX_KEY) && + key_cmp(table_share->primary_key, old_data, new_data)) || + (old_part_id != new_part_id)) { int read_res, insert_res, delete_res, undo_res; - DBUG_PRINT("info", ("primary key update, doing pk read+delete+insert")); + DBUG_PRINT("info", ("primary key update or partition change, " + "doing read+delete+insert")); // Get all old fields, since we optimize away fields not in query - read_res= complemented_pk_read(old_data, new_data); + read_res= complemented_read(old_data, new_data, old_part_id); if (read_res) { - DBUG_PRINT("info", ("pk read failed")); + DBUG_PRINT("info", ("read failed")); DBUG_RETURN(read_res); } // Delete old row @@ -2184,8 +2577,10 @@ int ha_ndbcluster::update_row(const byte *old_data, byte *new_data) if (!(op= cursor->updateCurrentTuple())) ERR_RETURN(trans->getNdbError()); m_ops_pending++; - if (uses_blob_value(FALSE)) + if (uses_blob_value()) m_blobs_pending= TRUE; + if (m_use_partition_function) + cursor->setPartitionId(new_part_id); } else { @@ -2193,7 +2588,9 @@ int ha_ndbcluster::update_row(const byte *old_data, byte *new_data) op->updateTuple() != 0) ERR_RETURN(trans->getNdbError()); - if (table->s->primary_key == MAX_KEY) + if (m_use_partition_function) + op->setPartitionId(new_part_id); + if (table_share->primary_key == MAX_KEY) { // This table has no primary key, use "hidden" primary key DBUG_PRINT("info", ("Using hidden key")); @@ -2216,15 +2613,25 @@ int ha_ndbcluster::update_row(const byte *old_data, byte *new_data) m_rows_changed++; // Set non-key attribute(s) - for (i= 0; i < table->s->fields; i++) + for (i= 0; i < table_share->fields; i++) { Field *field= table->field[i]; - if (((thd->query_id == field->query_id) || m_retrieve_all_fields) && + if (ha_get_bit_in_write_set(i+1) && (!(field->flags & PRI_KEY_FLAG)) && - set_ndb_value(op, field, i)) + set_ndb_value(op, field, i, new_data - table->record[0])) ERR_RETURN(op->getNdbError()); } + if (m_use_partition_function) + { + if (func_value >= INT_MAX32) + func_value= INT_MAX32; + uint32 part_func_value= (uint32)func_value; + uint no_fields= table_share->fields; + if (table_share->primary_key == MAX_KEY) + no_fields++; + op->setValue(no_fields, part_func_value); + } // Execute update operation if (!cursor && execute_no_commit(this,trans) != 0) { no_uncommitted_rows_execute_failure(); @@ -2245,11 +2652,21 @@ int ha_ndbcluster::delete_row(const byte *record) NdbTransaction *trans= m_active_trans; NdbScanOperation* cursor= m_active_cursor; NdbOperation *op; + uint32 part_id; + int error; DBUG_ENTER("delete_row"); + m_write_op= TRUE; statistic_increment(thd->status_var.ha_delete_count,&LOCK_status); m_rows_changed++; + if (m_use_partition_function && + (error= get_part_for_delete(record, table->record[0], m_part_info, + &part_id))) + { + DBUG_RETURN(error); + } + if (cursor) { /* @@ -2264,6 +2681,9 @@ int ha_ndbcluster::delete_row(const byte *record) ERR_RETURN(trans->getNdbError()); m_ops_pending++; + if (m_use_partition_function) + cursor->setPartitionId(part_id); + no_uncommitted_rows_update(-1); if (!m_primary_key_update) @@ -2277,9 +2697,12 @@ int ha_ndbcluster::delete_row(const byte *record) op->deleteTuple() != 0) ERR_RETURN(trans->getNdbError()); + if (m_use_partition_function) + op->setPartitionId(part_id); + no_uncommitted_rows_update(-1); - if (table->s->primary_key == MAX_KEY) + if (table_share->primary_key == MAX_KEY) { // This table has no primary key, use "hidden" primary key DBUG_PRINT("info", ("Using hidden key")); @@ -2317,83 +2740,128 @@ int ha_ndbcluster::delete_row(const byte *record) set to null. */ -void ha_ndbcluster::unpack_record(byte* buf) +void ndb_unpack_record(TABLE *table, NdbValue *value, + MY_BITMAP *defined, byte *buf) { + Field **p_field= table->field, *field= *p_field; uint row_offset= (uint) (buf - table->record[0]); - Field **field, **end; - NdbValue *value= m_value; - DBUG_ENTER("unpack_record"); + DBUG_ENTER("ndb_unpack_record"); - end= table->field + table->s->fields; - // Set null flag(s) bzero(buf, table->s->null_bytes); - for (field= table->field; - field < end; - field++, value++) + for ( ; field; + p_field++, value++, field= *p_field) { if ((*value).ptr) { - if (! ((*field)->flags & BLOB_FLAG)) + if (!(field->flags & BLOB_FLAG)) { - if ((*value).rec->isNULL()) - (*field)->set_null(row_offset); - else if ((*field)->type() == MYSQL_TYPE_BIT) + int is_null= (*value).rec->isNULL(); + if (is_null) { - uint pack_len= (*field)->pack_length(); - if (pack_len < 5) + if (is_null > 0) + { + DBUG_PRINT("info",("[%u] NULL", + (*value).rec->getColumn()->getColumnNo())); + field->set_null(row_offset); + } + else + { + DBUG_PRINT("info",("[%u] UNDEFINED", + (*value).rec->getColumn()->getColumnNo())); + bitmap_clear_bit(defined, + (*value).rec->getColumn()->getColumnNo()); + } + } + else if (field->type() == MYSQL_TYPE_BIT) + { + byte *save_field_ptr= field->ptr; + field->ptr= save_field_ptr + row_offset; + if (field->pack_length() < 5) { DBUG_PRINT("info", ("bit field H'%.8X", (*value).rec->u_32_value())); - ((Field_bit *) *field)->store((longlong) - (*value).rec->u_32_value(), - FALSE); + ((Field_bit*) field)->store((longlong) + (*value).rec->u_32_value(), FALSE); } else { DBUG_PRINT("info", ("bit field H'%.8X%.8X", - *(Uint32 *)(*value).rec->aRef(), - *((Uint32 *)(*value).rec->aRef()+1))); - ((Field_bit *) *field)->store((longlong) - (*value).rec->u_64_value(), TRUE); + *(Uint32*) (*value).rec->aRef(), + *((Uint32*) (*value).rec->aRef()+1))); + ((Field_bit*) field)->store((longlong) + (*value).rec->u_64_value(),TRUE); } + field->ptr= save_field_ptr; + DBUG_PRINT("info",("[%u] SET", + (*value).rec->getColumn()->getColumnNo())); + DBUG_DUMP("info", (const char*) field->ptr, field->field_length); + } + else + { + DBUG_PRINT("info",("[%u] SET", + (*value).rec->getColumn()->getColumnNo())); + DBUG_DUMP("info", (const char*) field->ptr, field->field_length); } } else { - NdbBlob* ndb_blob= (*value).blob; - bool isNull= TRUE; + NdbBlob *ndb_blob= (*value).blob; + uint col_no = ndb_blob->getColumn()->getColumnNo(); + int isNull; + ndb_blob->getDefined(isNull); + if (isNull == 1) + { + DBUG_PRINT("info",("[%u] NULL", col_no)); + field->set_null(row_offset); + } + else if (isNull == -1) + { + DBUG_PRINT("info",("[%u] UNDEFINED", col_no)); + bitmap_clear_bit(defined, col_no); + } + else + { #ifndef DBUG_OFF - int ret= + // pointer vas set in get_ndb_blobs_value + Field_blob *field_blob= (Field_blob*)field; + char* ptr; + field_blob->get_ptr(&ptr, row_offset); + uint32 len= field_blob->get_length(row_offset); + DBUG_PRINT("info",("[%u] SET ptr=%p len=%u", col_no, ptr, len)); #endif - ndb_blob->getNull(isNull); - DBUG_ASSERT(ret == 0); - if (isNull) - (*field)->set_null(row_offset); + } } } } - + DBUG_VOID_RETURN; +} + +void ha_ndbcluster::unpack_record(byte *buf) +{ + ndb_unpack_record(table, m_value, 0, buf); #ifndef DBUG_OFF // Read and print all values that was fetched - if (table->s->primary_key == MAX_KEY) + if (table_share->primary_key == MAX_KEY) { // Table with hidden primary key - int hidden_no= table->s->fields; + int hidden_no= table_share->fields; const NDBTAB *tab= (const NDBTAB *) m_table; const NDBCOL *hidden_col= tab->getColumn(hidden_no); const NdbRecAttr* rec= m_value[hidden_no].rec; DBUG_ASSERT(rec); - DBUG_PRINT("hidden", ("%d: %s \"%llu\"", hidden_no, + DBUG_PRINT("hidden", ("%d: %s \"%llu\"", hidden_no, hidden_col->getName(), rec->u_64_value())); - } - //print_results(); + } + //DBUG_EXECUTE("value", print_results();); #endif - DBUG_VOID_RETURN; } /* Utility function to print/dump the fetched field + to avoid unnecessary work, wrap in DBUG_EXECUTE as in: + + DBUG_EXECUTE("value", print_results();); */ void ha_ndbcluster::print_results() @@ -2401,15 +2869,11 @@ void ha_ndbcluster::print_results() DBUG_ENTER("print_results"); #ifndef DBUG_OFF - const NDBTAB *tab= (const NDBTAB*) m_table; - - if (!_db_on_) - DBUG_VOID_RETURN; char buf_type[MAX_FIELD_WIDTH], buf_val[MAX_FIELD_WIDTH]; String type(buf_type, sizeof(buf_type), &my_charset_bin); String val(buf_val, sizeof(buf_val), &my_charset_bin); - for (uint f= 0; f < table->s->fields; f++) + for (uint f= 0; f < table_share->fields; f++) { /* Use DBUG_PRINT since DBUG_FILE cannot be filtered out */ char buf[2000]; @@ -2457,11 +2921,13 @@ print_value: } -int ha_ndbcluster::index_init(uint index) +int ha_ndbcluster::index_init(uint index, bool sorted) { DBUG_ENTER("ha_ndbcluster::index_init"); - DBUG_PRINT("enter", ("index: %u", index)); - DBUG_RETURN(handler::index_init(index)); + DBUG_PRINT("enter", ("index: %u sorted: %d", index, sorted)); + active_index= index; + m_sorted= sorted; + DBUG_RETURN(0); } @@ -2498,55 +2964,16 @@ int ha_ndbcluster::index_read(byte *buf, const byte *key, uint key_len, enum ha_rkey_function find_flag) { + key_range start_key; + bool descending= FALSE; DBUG_ENTER("ha_ndbcluster::index_read"); DBUG_PRINT("enter", ("active_index: %u, key_len: %u, find_flag: %d", active_index, key_len, find_flag)); - int error; - ndb_index_type type= get_index_type(active_index); - const KEY* key_info= table->key_info+active_index; - switch (type){ - case PRIMARY_KEY_ORDERED_INDEX: - case PRIMARY_KEY_INDEX: - if (find_flag == HA_READ_KEY_EXACT && key_info->key_length == key_len) - { - if (m_active_cursor && (error= close_scan())) - DBUG_RETURN(error); - DBUG_RETURN(pk_read(key, key_len, buf)); - } - else if (type == PRIMARY_KEY_INDEX) - { - DBUG_RETURN(1); - } - break; - case UNIQUE_ORDERED_INDEX: - case UNIQUE_INDEX: - if (find_flag == HA_READ_KEY_EXACT && key_info->key_length == key_len && - !check_null_in_key(key_info, key, key_len)) - { - if (m_active_cursor && (error= close_scan())) - DBUG_RETURN(error); - DBUG_RETURN(unique_index_read(key, key_len, buf)); - } - else if (type == UNIQUE_INDEX) - { - DBUG_RETURN(1); - } - break; - case ORDERED_INDEX: - break; - default: - case UNDEFINED_INDEX: - DBUG_ASSERT(FALSE); - DBUG_RETURN(1); - break; - } - - key_range start_key; start_key.key= key; start_key.length= key_len; start_key.flag= find_flag; - bool descending= FALSE; + descending= FALSE; switch (find_flag) { case HA_READ_KEY_OR_PREV: case HA_READ_BEFORE_KEY: @@ -2557,8 +2984,8 @@ int ha_ndbcluster::index_read(byte *buf, default: break; } - error= ordered_index_scan(&start_key, 0, TRUE, descending, buf); - DBUG_RETURN(error == HA_ERR_END_OF_FILE ? HA_ERR_KEY_NOT_FOUND : error); + DBUG_RETURN(read_range_first_to_buf(&start_key, 0, descending, + m_sorted, buf)); } @@ -2569,7 +2996,8 @@ int ha_ndbcluster::index_read_idx(byte *buf, uint index_no, statistic_increment(current_thd->status_var.ha_read_key_count, &LOCK_status); DBUG_ENTER("ha_ndbcluster::index_read_idx"); DBUG_PRINT("enter", ("index_no: %u, key_len: %u", index_no, key_len)); - index_init(index_no); + close_scan(); + index_init(index_no, 0); DBUG_RETURN(index_read(buf, key, key_len, find_flag)); } @@ -2600,7 +3028,7 @@ int ha_ndbcluster::index_first(byte *buf) // Start the ordered index scan and fetch the first row // Only HA_READ_ORDER indexes get called by index_first - DBUG_RETURN(ordered_index_scan(0, 0, TRUE, FALSE, buf)); + DBUG_RETURN(ordered_index_scan(0, 0, TRUE, FALSE, buf, NULL)); } @@ -2608,7 +3036,7 @@ int ha_ndbcluster::index_last(byte *buf) { DBUG_ENTER("ha_ndbcluster::index_last"); statistic_increment(current_thd->status_var.ha_read_last_count,&LOCK_status); - DBUG_RETURN(ordered_index_scan(0, 0, TRUE, TRUE, buf)); + DBUG_RETURN(ordered_index_scan(0, 0, TRUE, TRUE, buf, NULL)); } int ha_ndbcluster::index_read_last(byte * buf, const byte * key, uint key_len) @@ -2617,66 +3045,85 @@ int ha_ndbcluster::index_read_last(byte * buf, const byte * key, uint key_len) DBUG_RETURN(index_read(buf, key, key_len, HA_READ_PREFIX_LAST)); } -inline int ha_ndbcluster::read_range_first_to_buf(const key_range *start_key, const key_range *end_key, - bool eq_r, bool sorted, + bool desc, bool sorted, byte* buf) { - KEY* key_info; - int error= 1; + part_id_range part_spec; + ndb_index_type type= get_index_type(active_index); + const KEY* key_info= table->key_info+active_index; + int error; DBUG_ENTER("ha_ndbcluster::read_range_first_to_buf"); - DBUG_PRINT("info", ("eq_r: %d, sorted: %d", eq_r, sorted)); + DBUG_PRINT("info", ("desc: %d, sorted: %d", desc, sorted)); + + if (m_use_partition_function) + { + get_partition_set(table, buf, active_index, start_key, &part_spec); + DBUG_PRINT("info", ("part_spec.start_part = %u, part_spec.end_part = %u", + part_spec.start_part, part_spec.end_part)); + /* + If partition pruning has found no partition in set + we can return HA_ERR_END_OF_FILE + If partition pruning has found exactly one partition in set + we can optimize scan to run towards that partition only. + */ + if (part_spec.start_part > part_spec.end_part) + { + DBUG_RETURN(HA_ERR_END_OF_FILE); + } + else if (part_spec.start_part == part_spec.end_part) + { + /* + Only one partition is required to scan, if sorted is required we + don't need it any more since output from one ordered partitioned + index is always sorted. + */ + sorted= FALSE; + } + } - switch (get_index_type(active_index)){ + m_write_op= FALSE; + switch (type){ case PRIMARY_KEY_ORDERED_INDEX: case PRIMARY_KEY_INDEX: - key_info= table->key_info + active_index; if (start_key && start_key->length == key_info->key_length && start_key->flag == HA_READ_KEY_EXACT) { if (m_active_cursor && (error= close_scan())) DBUG_RETURN(error); - error= pk_read(start_key->key, start_key->length, buf); - DBUG_RETURN(error == HA_ERR_KEY_NOT_FOUND ? HA_ERR_END_OF_FILE : error); + DBUG_RETURN(pk_read(start_key->key, start_key->length, buf, + part_spec.start_part)); } break; case UNIQUE_ORDERED_INDEX: case UNIQUE_INDEX: - key_info= table->key_info + active_index; if (start_key && start_key->length == key_info->key_length && start_key->flag == HA_READ_KEY_EXACT && !check_null_in_key(key_info, start_key->key, start_key->length)) { if (m_active_cursor && (error= close_scan())) DBUG_RETURN(error); - error= unique_index_read(start_key->key, start_key->length, buf); - DBUG_RETURN(error == HA_ERR_KEY_NOT_FOUND ? HA_ERR_END_OF_FILE : error); + DBUG_RETURN(unique_index_read(start_key->key, start_key->length, buf)); } break; default: break; } - // Start the ordered index scan and fetch the first row - error= ordered_index_scan(start_key, end_key, sorted, FALSE, buf); - DBUG_RETURN(error); + DBUG_RETURN(ordered_index_scan(start_key, end_key, sorted, desc, buf, + &part_spec)); } - int ha_ndbcluster::read_range_first(const key_range *start_key, const key_range *end_key, bool eq_r, bool sorted) { byte* buf= table->record[0]; DBUG_ENTER("ha_ndbcluster::read_range_first"); - - DBUG_RETURN(read_range_first_to_buf(start_key, - end_key, - eq_r, - sorted, - buf)); + DBUG_RETURN(read_range_first_to_buf(start_key, end_key, FALSE, + sorted, buf)); } int ha_ndbcluster::read_range_next() @@ -2702,7 +3149,7 @@ int ha_ndbcluster::rnd_init(bool scan) DBUG_RETURN(-1); } } - index_init(table->s->primary_key); + index_init(table_share->primary_key, 0); DBUG_RETURN(0); } @@ -2769,7 +3216,20 @@ int ha_ndbcluster::rnd_pos(byte *buf, byte *pos) &LOCK_status); // The primary key for the record is stored in pos // Perform a pk_read using primary key "index" - DBUG_RETURN(pk_read(pos, ref_length, buf)); + { + part_id_range part_spec; + if (m_use_partition_function) + { + key_range key_spec; + KEY *key_info= table->key_info + active_index; + key_spec.key= pos; + key_spec.length= ref_length; + key_spec.flag= HA_READ_KEY_EXACT; + get_full_part_id_from_key(table, buf, key_info, &key_spec, &part_spec); + DBUG_ASSERT(part_spec.start_part == part_spec.end_part); + } + DBUG_RETURN(pk_read(pos, ref_length, buf, part_spec.start_part)); + } } @@ -2787,9 +3247,9 @@ void ha_ndbcluster::position(const byte *record) byte *buff; DBUG_ENTER("position"); - if (table->s->primary_key != MAX_KEY) + if (table_share->primary_key != MAX_KEY) { - key_info= table->key_info + table->s->primary_key; + key_info= table->key_info + table_share->primary_key; key_part= key_info->key_part; end= key_part + key_info->key_parts; buff= ref; @@ -2913,86 +3373,34 @@ void ha_ndbcluster::info(uint flag) } +void ha_ndbcluster::get_dynamic_partition_info(PARTITION_INFO *stat_info, + uint part_id) +{ + /* + This functions should be fixed. Suggested fix: to + implement ndb function which retrives the statistics + about ndb partitions. + */ + bzero((char*) stat_info, sizeof(PARTITION_INFO)); + return; +} + + int ha_ndbcluster::extra(enum ha_extra_function operation) { DBUG_ENTER("extra"); switch (operation) { - case HA_EXTRA_NORMAL: /* Optimize for space (def) */ - DBUG_PRINT("info", ("HA_EXTRA_NORMAL")); - break; - case HA_EXTRA_QUICK: /* Optimize for speed */ - DBUG_PRINT("info", ("HA_EXTRA_QUICK")); - break; case HA_EXTRA_RESET: /* Reset database to after open */ DBUG_PRINT("info", ("HA_EXTRA_RESET")); DBUG_PRINT("info", ("Clearing condition stack")); cond_clear(); - break; - case HA_EXTRA_CACHE: /* Cash record in HA_rrnd() */ - DBUG_PRINT("info", ("HA_EXTRA_CACHE")); - break; - case HA_EXTRA_NO_CACHE: /* End cacheing of records (def) */ - DBUG_PRINT("info", ("HA_EXTRA_NO_CACHE")); - break; - case HA_EXTRA_NO_READCHECK: /* No readcheck on update */ - DBUG_PRINT("info", ("HA_EXTRA_NO_READCHECK")); - break; - case HA_EXTRA_READCHECK: /* Use readcheck (def) */ - DBUG_PRINT("info", ("HA_EXTRA_READCHECK")); - break; - case HA_EXTRA_KEYREAD: /* Read only key to database */ - DBUG_PRINT("info", ("HA_EXTRA_KEYREAD")); - break; - case HA_EXTRA_NO_KEYREAD: /* Normal read of records (def) */ - DBUG_PRINT("info", ("HA_EXTRA_NO_KEYREAD")); - break; - case HA_EXTRA_NO_USER_CHANGE: /* No user is allowed to write */ - DBUG_PRINT("info", ("HA_EXTRA_NO_USER_CHANGE")); - break; - case HA_EXTRA_KEY_CACHE: - DBUG_PRINT("info", ("HA_EXTRA_KEY_CACHE")); - break; - case HA_EXTRA_NO_KEY_CACHE: - DBUG_PRINT("info", ("HA_EXTRA_NO_KEY_CACHE")); - break; - case HA_EXTRA_WAIT_LOCK: /* Wait until file is avalably (def) */ - DBUG_PRINT("info", ("HA_EXTRA_WAIT_LOCK")); - break; - case HA_EXTRA_NO_WAIT_LOCK: /* If file is locked, return quickly */ - DBUG_PRINT("info", ("HA_EXTRA_NO_WAIT_LOCK")); - break; - case HA_EXTRA_WRITE_CACHE: /* Use write cache in ha_write() */ - DBUG_PRINT("info", ("HA_EXTRA_WRITE_CACHE")); - break; - case HA_EXTRA_FLUSH_CACHE: /* flush write_record_cache */ - DBUG_PRINT("info", ("HA_EXTRA_FLUSH_CACHE")); - break; - case HA_EXTRA_NO_KEYS: /* Remove all update of keys */ - DBUG_PRINT("info", ("HA_EXTRA_NO_KEYS")); - break; - case HA_EXTRA_KEYREAD_CHANGE_POS: /* Keyread, but change pos */ - DBUG_PRINT("info", ("HA_EXTRA_KEYREAD_CHANGE_POS")); /* xxxxchk -r must be used */ - break; - case HA_EXTRA_REMEMBER_POS: /* Remember pos for next/prev */ - DBUG_PRINT("info", ("HA_EXTRA_REMEMBER_POS")); - break; - case HA_EXTRA_RESTORE_POS: - DBUG_PRINT("info", ("HA_EXTRA_RESTORE_POS")); - break; - case HA_EXTRA_REINIT_CACHE: /* init cache from current record */ - DBUG_PRINT("info", ("HA_EXTRA_REINIT_CACHE")); - break; - case HA_EXTRA_FORCE_REOPEN: /* Datafile have changed on disk */ - DBUG_PRINT("info", ("HA_EXTRA_FORCE_REOPEN")); - break; - case HA_EXTRA_FLUSH: /* Flush tables to disk */ - DBUG_PRINT("info", ("HA_EXTRA_FLUSH")); - break; - case HA_EXTRA_NO_ROWS: /* Don't write rows */ - DBUG_PRINT("info", ("HA_EXTRA_NO_ROWS")); - break; - case HA_EXTRA_RESET_STATE: /* Reset positions */ - DBUG_PRINT("info", ("HA_EXTRA_RESET_STATE")); + /* + * Regular partition pruning will set the bitmap appropriately. + * Some queries like ALTER TABLE doesn't use partition pruning and + * thus the 'used_partitions' bitmap needs to be initialized + */ + if (m_part_info) + bitmap_set_all(&m_part_info->used_partitions); break; case HA_EXTRA_IGNORE_DUP_KEY: /* Dup keys don't rollback everything*/ DBUG_PRINT("info", ("HA_EXTRA_IGNORE_DUP_KEY")); @@ -3012,34 +3420,18 @@ int ha_ndbcluster::extra(enum ha_extra_function operation) m_use_write= FALSE; m_ignore_dup_key= FALSE; break; - case HA_EXTRA_RETRIEVE_ALL_COLS: /* Retrieve all columns, not just those - where field->query_id is the same as - the current query id */ - DBUG_PRINT("info", ("HA_EXTRA_RETRIEVE_ALL_COLS")); - m_retrieve_all_fields= TRUE; + case HA_EXTRA_IGNORE_NO_KEY: + DBUG_PRINT("info", ("HA_EXTRA_IGNORE_NO_KEY")); + DBUG_PRINT("info", ("Turning on AO_IgnoreError at Commit/NoCommit")); + m_ignore_no_key= TRUE; break; - case HA_EXTRA_PREPARE_FOR_DELETE: - DBUG_PRINT("info", ("HA_EXTRA_PREPARE_FOR_DELETE")); + case HA_EXTRA_NO_IGNORE_NO_KEY: + DBUG_PRINT("info", ("HA_EXTRA_NO_IGNORE_NO_KEY")); + DBUG_PRINT("info", ("Turning on AO_IgnoreError at Commit/NoCommit")); + m_ignore_no_key= FALSE; break; - case HA_EXTRA_PREPARE_FOR_UPDATE: /* Remove read cache if problems */ - DBUG_PRINT("info", ("HA_EXTRA_PREPARE_FOR_UPDATE")); - break; - case HA_EXTRA_PRELOAD_BUFFER_SIZE: - DBUG_PRINT("info", ("HA_EXTRA_PRELOAD_BUFFER_SIZE")); - break; - case HA_EXTRA_RETRIEVE_PRIMARY_KEY: - DBUG_PRINT("info", ("HA_EXTRA_RETRIEVE_PRIMARY_KEY")); - m_retrieve_primary_key= TRUE; - break; - case HA_EXTRA_CHANGE_KEY_TO_UNIQUE: - DBUG_PRINT("info", ("HA_EXTRA_CHANGE_KEY_TO_UNIQUE")); - break; - case HA_EXTRA_CHANGE_KEY_TO_DUP: - DBUG_PRINT("info", ("HA_EXTRA_CHANGE_KEY_TO_DUP")); - case HA_EXTRA_KEYREAD_PRESERVE_FIELDS: - DBUG_PRINT("info", ("HA_EXTRA_KEYREAD_PRESERVE_FIELDS")); + default: break; - } DBUG_RETURN(0); @@ -3257,8 +3649,9 @@ int ha_ndbcluster::external_lock(THD *thd, int lock_type) Thd_ndb *thd_ndb= get_thd_ndb(thd); Ndb *ndb= thd_ndb->ndb; - DBUG_PRINT("enter", ("thd: %x, thd_ndb: %x, thd_ndb->lock_count: %d", - thd, thd_ndb, thd_ndb->lock_count)); + DBUG_PRINT("enter", ("this: %x thd: %lx thd_ndb: %lx " + "thd_ndb->lock_count: %d", + this, thd, thd_ndb, thd_ndb->lock_count)); if (lock_type != F_UNLCK) { @@ -3335,8 +3728,6 @@ int ha_ndbcluster::external_lock(THD *thd, int lock_type) DBUG_ASSERT(m_active_trans); // Start of transaction m_rows_changed= 0; - m_retrieve_all_fields= FALSE; - m_retrieve_primary_key= FALSE; m_ops_pending= 0; { NDBDICT *dict= ndb->getDictionary(); @@ -3370,7 +3761,7 @@ int ha_ndbcluster::external_lock(THD *thd, int lock_type) { m_table= (void *)tab; m_table_version = tab->getObjectVersion(); - if (!(my_errno= build_index_list(ndb, table, ILBP_OPEN))) + if (!(my_errno= open_indexes(ndb, table, FALSE))) DBUG_RETURN(my_errno); } m_table_info= tab_info; @@ -3474,8 +3865,6 @@ int ha_ndbcluster::start_stmt(THD *thd, thr_lock_type lock_type) m_active_trans= trans; // Start of statement - m_retrieve_all_fields= FALSE; - m_retrieve_primary_key= FALSE; m_ops_pending= 0; DBUG_RETURN(error); @@ -3486,7 +3875,7 @@ int ha_ndbcluster::start_stmt(THD *thd, thr_lock_type lock_type) Commit a transaction started in NDB */ -int ndbcluster_commit(THD *thd, bool all) +static int ndbcluster_commit(THD *thd, bool all) { int res= 0; Thd_ndb *thd_ndb= get_thd_ndb(thd); @@ -3521,7 +3910,8 @@ int ndbcluster_commit(THD *thd, bool all) while ((share= it++)) { pthread_mutex_lock(&share->mutex); - DBUG_PRINT("info", ("Invalidate commit_count for %s, share->commit_count: %d ", share->table_name, share->commit_count)); + DBUG_PRINT("info", ("Invalidate commit_count for %s, share->commit_count: %d ", + share->key, share->commit_count)); share->commit_count= 0; share->commit_count_lock++; pthread_mutex_unlock(&share->mutex); @@ -3536,7 +3926,7 @@ int ndbcluster_commit(THD *thd, bool all) Rollback a transaction started in NDB */ -int ndbcluster_rollback(THD *thd, bool all) +static int ndbcluster_rollback(THD *thd, bool all) { int res= 0; Thd_ndb *thd_ndb= get_thd_ndb(thd); @@ -3852,53 +4242,7 @@ static int create_ndb_column(NDBCOL &col, /* Create a table in NDB Cluster - */ - -static void ndb_set_fragmentation(NDBTAB &tab, TABLE *form, uint pk_length) -{ - if (form->s->max_rows == (ha_rows) 0) /* default setting, don't set fragmentation */ - return; - /** - * get the number of fragments right - */ - uint no_fragments; - { -#if MYSQL_VERSION_ID >= 50000 - uint acc_row_size= 25 + /*safety margin*/ 2; -#else - uint acc_row_size= pk_length*4; - /* add acc overhead */ - if (pk_length <= 8) /* main page will set the limit */ - acc_row_size+= 25 + /*safety margin*/ 2; - else /* overflow page will set the limit */ - acc_row_size+= 4 + /*safety margin*/ 4; -#endif - ulonglong acc_fragment_size= 512*1024*1024; - ulonglong max_rows= form->s->max_rows; -#if MYSQL_VERSION_ID >= 50100 - no_fragments= (max_rows*acc_row_size)/acc_fragment_size+1; -#else - no_fragments= ((max_rows*acc_row_size)/acc_fragment_size+1 - +1/*correct rounding*/)/2; -#endif - } - { - uint no_nodes= g_ndb_cluster_connection->no_db_nodes(); - NDBTAB::FragmentType ftype; - if (no_fragments > 2*no_nodes) - { - ftype= NDBTAB::FragAllLarge; - if (no_fragments > 4*no_nodes) - push_warning(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR, - "Ndb might have problems storing the max amount of rows specified"); - } - else if (no_fragments > no_nodes) - ftype= NDBTAB::FragAllMedium; - else - ftype= NDBTAB::FragAllSmall; - tab.setFragmentType(ftype); - } -} +*/ int ha_ndbcluster::create(const char *name, TABLE *form, @@ -3908,23 +4252,29 @@ int ha_ndbcluster::create(const char *name, NDBCOL col; uint pack_length, length, i, pk_length= 0; const void *data, *pack_data; - char name2[FN_HEADLEN]; bool create_from_engine= (info->table_options & HA_OPTION_CREATE_FROM_ENGINE); DBUG_ENTER("ha_ndbcluster::create"); DBUG_PRINT("enter", ("name: %s", name)); - fn_format(name2, name, "", "",2); // Remove the .frm extension - set_dbname(name2); - set_tabname(name2); + DBUG_ASSERT(*fn_rext((char*)name) == 0); + set_dbname(name); + set_tabname(name); + + table= form; if (create_from_engine) { /* - Table alreay exists in NDB and frm file has been created by + Table already exists in NDB and frm file has been created by caller. Do Ndb specific stuff, such as create a .ndb file */ - my_errno= write_ndb_file(); + if ((my_errno= write_ndb_file(name))) + DBUG_RETURN(my_errno); +#ifdef HAVE_NDB_BINLOG + ndbcluster_create_binlog_setup(get_ndb(), name, strlen(name), + m_dbname, m_tabname, FALSE); +#endif /* HAVE_NDB_BINLOG */ DBUG_RETURN(my_errno); } @@ -3938,7 +4288,7 @@ int ha_ndbcluster::create(const char *name, if (packfrm(data, length, &pack_data, &pack_length)) DBUG_RETURN(2); - DBUG_PRINT("info", ("setFrm data=%x, len=%d", pack_data, pack_length)); + DBUG_PRINT("info", ("setFrm data=%lx len=%d", pack_data, pack_length)); tab.setFrm(pack_data, pack_length); my_free((char*)data, MYF(0)); my_free((char*)pack_data, MYF(0)); @@ -3951,11 +4301,32 @@ int ha_ndbcluster::create(const char *name, field->pack_length())); if ((my_errno= create_ndb_column(col, field, info))) DBUG_RETURN(my_errno); + + if (info->store_on_disk || getenv("NDB_DEFAULT_DISK")) + col.setStorageType(NdbDictionary::Column::StorageTypeDisk); + else + col.setStorageType(NdbDictionary::Column::StorageTypeMemory); + tab.addColumn(col); if (col.getPrimaryKey()) pk_length += (field->pack_length() + 3) / 4; } - + + KEY* key_info; + for (i= 0, key_info= form->key_info; i < form->s->keys; i++, key_info++) + { + KEY_PART_INFO *key_part= key_info->key_part; + KEY_PART_INFO *end= key_part + key_info->key_parts; + for (; key_part != end; key_part++) + tab.getColumn(key_part->fieldnr-1)->setStorageType( + NdbDictionary::Column::StorageTypeMemory); + } + + if (info->store_on_disk) + if (info->tablespace) + tab.setTablespace(info->tablespace); + else + tab.setTablespace("DEFAULT-TS"); // No primary key, create shadow key as 64 bit, auto increment if (form->s->primary_key == MAX_KEY) { @@ -3969,7 +4340,7 @@ int ha_ndbcluster::create(const char *name, tab.addColumn(col); pk_length += 2; } - + // Make sure that blob tables don't have to big part size for (i= 0; i < form->s->fields; i++) { @@ -4003,7 +4374,12 @@ int ha_ndbcluster::create(const char *name, } } - ndb_set_fragmentation(tab, form, pk_length); + // Check partition info + partition_info *part_info= form->part_info; + if ((my_errno= set_up_partition_info(part_info, form, (void*)&tab))) + { + DBUG_RETURN(my_errno); + } if ((my_errno= check_ndb_connection())) DBUG_RETURN(my_errno); @@ -4022,20 +4398,182 @@ int ha_ndbcluster::create(const char *name, m_dbname, m_tabname)); // Create secondary indexes - my_errno= build_index_list(ndb, form, ILBP_CREATE); + my_errno= create_indexes(ndb, form); if (!my_errno) - my_errno= write_ndb_file(); + my_errno= write_ndb_file(name); + else + { + /* + Failed to create an index, + drop the table (and all it's indexes) + */ + drop_ndb_table(); + } + +#ifdef HAVE_NDB_BINLOG + if (!my_errno) + { + NDB_SHARE *share= 0; + pthread_mutex_lock(&ndbcluster_mutex); + /* + First make sure we get a "fresh" share here, not an old trailing one... + */ + { + uint length= (uint) strlen(name); + if ((share= (NDB_SHARE*) hash_search(&ndbcluster_open_tables, + (byte*) name, length))) + handle_trailing_share(share); + } + /* + get a new share + */ + + if (!(share= get_share(name, form, true, true))) + { + sql_print_error("NDB: allocating table share for %s failed", name); + /* my_errno is set */ + } + pthread_mutex_unlock(&ndbcluster_mutex); + + while (!IS_TMP_PREFIX(m_tabname)) + { + const NDBTAB *t= dict->getTable(m_tabname); + String event_name(INJECTOR_EVENT_LEN); + ndb_rep_event_name(&event_name,m_dbname,m_tabname); + int do_event_op= ndb_binlog_running; + + if (!schema_share && + strcmp(share->db, NDB_REP_DB) == 0 && + strcmp(share->table_name, NDB_SCHEMA_TABLE) == 0) + do_event_op= 1; + + /* + Always create an event for the table, as other mysql servers + expect it to be there. + */ + if (!ndbcluster_create_event(ndb, t, event_name.c_ptr(), share, + share && do_event_op /* push warning */)) + { + if (ndb_extra_logging) + sql_print_information("NDB Binlog: CREATE TABLE Event: %s", + event_name.c_ptr()); + if (share && do_event_op && + ndbcluster_create_event_ops(share, t, event_name.c_ptr())) + { + sql_print_error("NDB Binlog: FAILED CREATE TABLE event operations." + " Event: %s", name); + /* a warning has been issued to the client */ + } + } + /* + warning has been issued if ndbcluster_create_event failed + and (share && do_event_op) + */ + if (share && !do_event_op) + share->flags|= NSF_NO_BINLOG; + ndbcluster_log_schema_op(current_thd, share, + current_thd->query, current_thd->query_length, + share->db, share->table_name, + 0, 0, + SOT_CREATE_TABLE); + break; + } + } +#endif /* HAVE_NDB_BINLOG */ DBUG_RETURN(my_errno); } +int ha_ndbcluster::create_handler_files(const char *file) +{ + const char *name; + Ndb* ndb; + const NDBTAB *tab; + const void *data, *pack_data; + uint length, pack_length; + int error= 0; + + DBUG_ENTER("create_handler_files"); + + if (!(ndb= get_ndb())) + DBUG_RETURN(HA_ERR_NO_CONNECTION); + + NDBDICT *dict= ndb->getDictionary(); + if (!(tab= dict->getTable(m_tabname))) + DBUG_RETURN(0); // Must be a create, ignore since frm is saved in create + DBUG_ASSERT(get_ndb_share_state(m_share) == NSS_ALTERED); + name= table->s->normalized_path.str; + DBUG_PRINT("enter", ("m_tabname: %s, path: %s", m_tabname, name)); + if (readfrm(name, &data, &length) || + packfrm(data, length, &pack_data, &pack_length)) + { + DBUG_PRINT("info", ("Missing frm for %s", m_tabname)); + my_free((char*)data, MYF(MY_ALLOW_ZERO_PTR)); + my_free((char*)pack_data, MYF(MY_ALLOW_ZERO_PTR)); + error= 1; + } + else + { + DBUG_PRINT("info", ("Table %s has changed, altering frm in ndb", + m_tabname)); + error= table_changed(pack_data, pack_length); + my_free((char*)data, MYF(MY_ALLOW_ZERO_PTR)); + my_free((char*)pack_data, MYF(MY_ALLOW_ZERO_PTR)); + } + set_ndb_share_state(m_share, NSS_INITIAL); + free_share(&m_share); // Decrease ref_count + + DBUG_RETURN(error); +} + +int ha_ndbcluster::create_index(const char *name, KEY *key_info, + NDB_INDEX_TYPE idx_type, uint idx_no) +{ + int error= 0; + char unique_name[FN_LEN]; + static const char* unique_suffix= "$unique"; + DBUG_ENTER("ha_ndbcluster::create_ordered_index"); + DBUG_PRINT("info", ("Creating index %u: %s", idx_no, name)); + + if (idx_type == UNIQUE_ORDERED_INDEX || idx_type == UNIQUE_INDEX) + { + strxnmov(unique_name, FN_LEN, name, unique_suffix, NullS); + DBUG_PRINT("info", ("Created unique index name \'%s\' for index %d", + unique_name, idx_no)); + } + + switch (idx_type){ + case PRIMARY_KEY_INDEX: + // Do nothing, already created + break; + case PRIMARY_KEY_ORDERED_INDEX: + error= create_ordered_index(name, key_info); + break; + case UNIQUE_ORDERED_INDEX: + if (!(error= create_ordered_index(name, key_info))) + error= create_unique_index(unique_name, key_info); + break; + case UNIQUE_INDEX: + if (!(error= check_index_fields_not_null(idx_no))) + error= create_unique_index(unique_name, key_info); + break; + case ORDERED_INDEX: + error= create_ordered_index(name, key_info); + break; + default: + DBUG_ASSERT(FALSE); + break; + } + + DBUG_RETURN(error); +} int ha_ndbcluster::create_ordered_index(const char *name, KEY *key_info) { DBUG_ENTER("ha_ndbcluster::create_ordered_index"); - DBUG_RETURN(create_index(name, key_info, FALSE)); + DBUG_RETURN(create_ndb_index(name, key_info, FALSE)); } int ha_ndbcluster::create_unique_index(const char *name, @@ -4043,7 +4581,7 @@ int ha_ndbcluster::create_unique_index(const char *name, { DBUG_ENTER("ha_ndbcluster::create_unique_index"); - DBUG_RETURN(create_index(name, key_info, TRUE)); + DBUG_RETURN(create_ndb_index(name, key_info, TRUE)); } @@ -4051,9 +4589,9 @@ int ha_ndbcluster::create_unique_index(const char *name, Create an index in NDB Cluster */ -int ha_ndbcluster::create_index(const char *name, - KEY *key_info, - bool unique) +int ha_ndbcluster::create_ndb_index(const char *name, + KEY *key_info, + bool unique) { Ndb *ndb= get_ndb(); NdbDictionary::Dictionary *dict= ndb->getDictionary(); @@ -4089,6 +4627,99 @@ int ha_ndbcluster::create_index(const char *name, DBUG_RETURN(0); } +/* + Add an index on-line to a table +*/ +int ha_ndbcluster::add_index(TABLE *table_arg, + KEY *key_info, uint num_of_keys) +{ + DBUG_ENTER("ha_ndbcluster::add_index"); + DBUG_PRINT("info", ("ha_ndbcluster::add_index to table %s", + table_arg->s->table_name)); + int error= 0; + uint idx; + + DBUG_ASSERT(m_share->state == NSS_INITIAL); + for (idx= 0; idx < num_of_keys; idx++) + { + KEY *key= key_info + idx; + KEY_PART_INFO *key_part= key->key_part; + KEY_PART_INFO *end= key_part + key->key_parts; + NDB_INDEX_TYPE idx_type= get_index_type_from_key(idx, key); + DBUG_PRINT("info", ("Adding index: '%s'", key_info[idx].name)); + // Add fields to key_part struct + for (; key_part != end; key_part++) + key_part->field= table->field[key_part->fieldnr]; + // Check index type + // Create index in ndb + if((error= create_index(key_info[idx].name, key, idx_type, idx))) + break; + } + if (!error) + { + ndbcluster_get_share(m_share); // Increase ref_count + set_ndb_share_state(m_share, NSS_ALTERED); + } + DBUG_RETURN(error); +} + +/* + Drop an index in ndb + */ +int ha_ndbcluster::drop_ndb_index(const char *name) +{ + DBUG_ENTER("ha_ndbcluster::drop_index"); + DBUG_PRINT("enter", ("name: %s ", name)); + Ndb *ndb= get_ndb(); + NdbDictionary::Dictionary *dict= ndb->getDictionary(); + DBUG_RETURN(dict->dropIndex(name, m_tabname)); +} + +/* + Mark one or several indexes for deletion. and + renumber the remaining indexes +*/ +int ha_ndbcluster::prepare_drop_index(TABLE *table_arg, + uint *key_num, uint num_of_keys) +{ + DBUG_ENTER("ha_ndbcluster::prepare_drop_index"); + DBUG_ASSERT(m_share->state == NSS_INITIAL); + // Mark indexes for deletion + uint idx; + for (idx= 0; idx < num_of_keys; idx++) + { + DBUG_PRINT("info", ("ha_ndbcluster::prepare_drop_index %u", *key_num)); + m_index[*key_num++].status= TO_BE_DROPPED; + } + // Renumber indexes + THD *thd= current_thd; + Thd_ndb *thd_ndb= get_thd_ndb(thd); + Ndb *ndb= thd_ndb->ndb; + renumber_indexes(ndb, table_arg); + ndbcluster_get_share(m_share); // Increase ref_count + set_ndb_share_state(m_share, NSS_ALTERED); + DBUG_RETURN(0); +} + +/* + Really drop all indexes marked for deletion +*/ +int ha_ndbcluster::final_drop_index(TABLE *table_arg) +{ + int error; + DBUG_ENTER("ha_ndbcluster::final_drop_index"); + DBUG_PRINT("info", ("ha_ndbcluster::final_drop_index")); + // Really drop indexes + THD *thd= current_thd; + Thd_ndb *thd_ndb= get_thd_ndb(thd); + Ndb *ndb= thd_ndb->ndb; + if((error= drop_indexes(ndb, table_arg))) + { + m_share->state= NSS_INITIAL; + free_share(&m_share); // Decrease ref_count + } + DBUG_RETURN(error); +} /* Rename a table in NDB Cluster @@ -4097,13 +4728,14 @@ int ha_ndbcluster::create_index(const char *name, int ha_ndbcluster::rename_table(const char *from, const char *to) { NDBDICT *dict; + char old_dbname[FN_HEADLEN]; char new_tabname[FN_HEADLEN]; const NDBTAB *orig_tab; int result; DBUG_ENTER("ha_ndbcluster::rename_table"); DBUG_PRINT("info", ("Renaming %s to %s", from, to)); - set_dbname(from); + set_dbname(from, old_dbname); set_tabname(from); set_tabname(to, new_tabname); @@ -4111,6 +4743,7 @@ int ha_ndbcluster::rename_table(const char *from, const char *to) DBUG_RETURN(my_errno= HA_ERR_NO_CONNECTION); Ndb *ndb= get_ndb(); + ndb->setDatabaseName(old_dbname); dict= ndb->getDictionary(); if (!(orig_tab= dict->getTable(m_tabname))) ERR_RETURN(dict->getNdbError()); @@ -4121,15 +4754,93 @@ int ha_ndbcluster::rename_table(const char *from, const char *to) if (!(orig_tab= dict->getTable(m_tabname))) ERR_RETURN(dict->getNdbError()); } +#ifdef HAVE_NDB_BINLOG + NDB_SHARE *share= get_share(from, 0, false); + if (share) + { + int r= rename_share(share, to); + DBUG_ASSERT(r == 0); + } +#endif m_table= (void *)orig_tab; // Change current database to that of target table set_dbname(to); ndb->setDatabaseName(m_dbname); - if (!(result= alter_table_name(new_tabname))) + + if ((result= alter_table_name(new_tabname))) { - // Rename .ndb file - result= handler::rename_table(from, to); +#ifdef HAVE_NDB_BINLOG + if (share) + { + int r= rename_share(share, from); + DBUG_ASSERT(r == 0); + free_share(&share); + } +#endif + DBUG_RETURN(result); } + + // Rename .ndb file + if ((result= handler::rename_table(from, to))) + { + // ToDo in 4.1 should rollback alter table... +#ifdef HAVE_NDB_BINLOG + if (share) + free_share(&share); +#endif + DBUG_RETURN(result); + } + +#ifdef HAVE_NDB_BINLOG + int is_old_table_tmpfile= 1; + if (share && share->op) + dict->forceGCPWait(); + + /* handle old table */ + if (!IS_TMP_PREFIX(m_tabname)) + { + is_old_table_tmpfile= 0; + String event_name(INJECTOR_EVENT_LEN); + ndb_rep_event_name(&event_name, from + sizeof(share_prefix) - 1, 0); + ndbcluster_handle_drop_table(ndb, event_name.c_ptr(), share); + } + + if (!result && !IS_TMP_PREFIX(new_tabname)) + { + /* always create an event for the table */ + String event_name(INJECTOR_EVENT_LEN); + ndb_rep_event_name(&event_name, to + sizeof(share_prefix) - 1, 0); + const NDBTAB *ndbtab= dict->getTable(new_tabname); + + if (!ndbcluster_create_event(ndb, ndbtab, event_name.c_ptr(), share, + share && ndb_binlog_running /* push warning */)) + { + if (ndb_extra_logging) + sql_print_information("NDB Binlog: RENAME Event: %s", + event_name.c_ptr()); + if (share && ndb_binlog_running && + ndbcluster_create_event_ops(share, ndbtab, event_name.c_ptr())) + { + sql_print_error("NDB Binlog: FAILED create event operations " + "during RENAME. Event %s", event_name.c_ptr()); + /* a warning has been issued to the client */ + } + } + /* + warning has been issued if ndbcluster_create_event failed + and (share && ndb_binlog_running) + */ + if (!is_old_table_tmpfile) + ndbcluster_log_schema_op(current_thd, share, + current_thd->query, current_thd->query_length, + m_dbname, new_tabname, + 0, 0, + SOT_RENAME_TABLE, + old_dbname, m_tabname); + } + if (share) + free_share(&share); +#endif DBUG_RETURN(result); } @@ -4144,7 +4855,8 @@ int ha_ndbcluster::alter_table_name(const char *to) Ndb *ndb= get_ndb(); NDBDICT *dict= ndb->getDictionary(); const NDBTAB *orig_tab= (const NDBTAB *) m_table; - DBUG_ENTER("alter_table_name_table"); + DBUG_ENTER("alter_table_name"); + DBUG_PRINT("info", ("from: %s to: %s", orig_tab->getName(), to)); NdbDictionary::Table new_tab= *orig_tab; new_tab.setName(to); @@ -4163,6 +4875,107 @@ int ha_ndbcluster::alter_table_name(const char *to) */ +/* static version which does not need a handler */ + +int +ha_ndbcluster::delete_table(ha_ndbcluster *h, Ndb *ndb, + const char *path, + const char *db, + const char *table_name) +{ + DBUG_ENTER("ha_ndbcluster::ndbcluster_delete_table"); + NDBDICT *dict= ndb->getDictionary(); +#ifdef HAVE_NDB_BINLOG + NDB_SHARE *share= get_share(path, 0, false); +#endif + + /* Drop the table from NDB */ + + int res; + if (h) + { + res= h->drop_ndb_table(); + } + else + { + ndb->setDatabaseName(db); + res= dict->dropTable(table_name); + } + + if (res) + { +#ifdef HAVE_NDB_BINLOG + /* the drop table failed for some reason, drop the share anyways */ + if (share) + { + pthread_mutex_lock(&ndbcluster_mutex); + if (share->state != NSS_DROPPED) + { + /* + The share kept by the server has not been freed, free it + */ + share->state= NSS_DROPPED; + free_share(&share, TRUE); + } + /* free the share taken above */ + free_share(&share, TRUE); + pthread_mutex_unlock(&ndbcluster_mutex); + } +#endif + DBUG_RETURN(res); + } + +#ifdef HAVE_NDB_BINLOG + /* stop the logging of the dropped table, and cleanup */ + + /* + drop table is successful even if table does not exist in ndb + and in case table was actually not dropped, there is no need + to force a gcp, and setting the event_name to null will indicate + that there is no event to be dropped + */ + int table_dropped= dict->getNdbError().code != 709; + + if (!IS_TMP_PREFIX(table_name) && share) + { + ndbcluster_log_schema_op(current_thd, share, + current_thd->query, current_thd->query_length, + share->db, share->table_name, + 0, 0, + SOT_DROP_TABLE); + } + else if (table_dropped && share && share->op) /* ndbcluster_log_schema_op + will do a force GCP */ + dict->forceGCPWait(); + + if (!IS_TMP_PREFIX(table_name)) + { + String event_name(INJECTOR_EVENT_LEN); + ndb_rep_event_name(&event_name, path + sizeof(share_prefix) - 1, 0); + ndbcluster_handle_drop_table(ndb, + table_dropped ? event_name.c_ptr() : 0, + share); + } + + if (share) + { + pthread_mutex_lock(&ndbcluster_mutex); + if (share->state != NSS_DROPPED) + { + /* + The share kept by the server has not been freed, free it + */ + share->state= NSS_DROPPED; + free_share(&share, TRUE); + } + /* free the share taken above */ + free_share(&share, TRUE); + pthread_mutex_unlock(&ndbcluster_mutex); + } +#endif + DBUG_RETURN(0); +} + int ha_ndbcluster::delete_table(const char *name) { DBUG_ENTER("ha_ndbcluster::delete_table"); @@ -4175,9 +4988,8 @@ int ha_ndbcluster::delete_table(const char *name) /* Call ancestor function to delete .ndb file */ handler::delete_table(name); - - /* Drop the table from NDB */ - DBUG_RETURN(drop_table()); + + DBUG_RETURN(delete_table(this, get_ndb(),name, m_dbname, m_tabname)); } @@ -4185,14 +4997,13 @@ int ha_ndbcluster::delete_table(const char *name) Drop table in NDB Cluster */ -int ha_ndbcluster::drop_table() +int ha_ndbcluster::drop_ndb_table() { Ndb *ndb= get_ndb(); NdbDictionary::Dictionary *dict= ndb->getDictionary(); - DBUG_ENTER("drop_table"); + DBUG_ENTER("intern_drop_table"); DBUG_PRINT("enter", ("Deleting %s", m_tabname)); - release_metadata(); if (dict->dropTable(m_tabname)) ERR_RETURN(dict->getNdbError()); @@ -4243,26 +5054,32 @@ ulonglong ha_ndbcluster::get_auto_increment() Constructor for the NDB Cluster table handler */ -ha_ndbcluster::ha_ndbcluster(TABLE *table_arg): +#define HA_NDBCLUSTER_TABLE_FLAGS \ + HA_REC_NOT_IN_SEQ | \ + HA_NULL_IN_KEY | \ + HA_AUTO_PART_KEY | \ + HA_NO_PREFIX_CHAR_KEYS | \ + HA_NEED_READ_RANGE_BUFFER | \ + HA_CAN_GEOMETRY | \ + HA_CAN_BIT_FIELD | \ + HA_PRIMARY_KEY_ALLOW_RANDOM_ACCESS + +ha_ndbcluster::ha_ndbcluster(TABLE_SHARE *table_arg): handler(&ndbcluster_hton, table_arg), m_active_trans(NULL), m_active_cursor(NULL), m_table(NULL), m_table_version(-1), m_table_info(NULL), - m_table_flags(HA_REC_NOT_IN_SEQ | - HA_NULL_IN_KEY | - HA_AUTO_PART_KEY | - HA_NO_PREFIX_CHAR_KEYS | - HA_NEED_READ_RANGE_BUFFER | - HA_CAN_GEOMETRY | - HA_CAN_BIT_FIELD), + m_table_flags(HA_NDBCLUSTER_TABLE_FLAGS), m_share(0), + m_part_info(NULL), + m_use_partition_function(FALSE), + m_sorted(FALSE), m_use_write(FALSE), m_ignore_dup_key(FALSE), m_primary_key_update(FALSE), - m_retrieve_all_fields(FALSE), - m_retrieve_primary_key(FALSE), + m_ignore_no_key(FALSE), m_rows_to_insert((ha_rows) 1), m_rows_inserted((ha_rows) 0), m_bulk_insert_rows((ha_rows) 1024), @@ -4291,13 +5108,7 @@ ha_ndbcluster::ha_ndbcluster(TABLE *table_arg): records= ~(ha_rows)0; // uninitialized block_size= 1024; - for (i= 0; i < MAX_KEY; i++) - { - m_index[i].type= UNDEFINED_INDEX; - m_index[i].unique_index= NULL; - m_index[i].index= NULL; - m_index[i].unique_index_attrid_map= NULL; - } + clear_indexes(); DBUG_VOID_RETURN; } @@ -4312,7 +5123,9 @@ ha_ndbcluster::~ha_ndbcluster() DBUG_ENTER("~ha_ndbcluster"); if (m_share) - free_share(m_share); + { + free_share(&m_share); + } release_metadata(); my_free(m_blobs_buffer, MYF(MY_ALLOW_ZERO_PTR)); m_blobs_buffer= 0; @@ -4338,27 +5151,33 @@ ha_ndbcluster::~ha_ndbcluster() Open a table for further use - fetch metadata for this table from NDB - check that table exists + + RETURN + 0 ok + < 0 Table has changed */ int ha_ndbcluster::open(const char *name, int mode, uint test_if_locked) { int res; KEY *key; - DBUG_ENTER("open"); - DBUG_PRINT("enter", ("name: %s mode: %d test_if_locked: %d", + DBUG_ENTER("ha_ndbcluster::open"); + DBUG_PRINT("enter", ("name: %s mode: %d test_if_locked: %d", name, mode, test_if_locked)); - // Setup ref_length to make room for the whole - // primary key to be written in the ref variable + /* + Setup ref_length to make room for the whole + primary key to be written in the ref variable + */ - if (table->s->primary_key != MAX_KEY) + if (table_share->primary_key != MAX_KEY) { - key= table->key_info+table->s->primary_key; + key= table->key_info+table_share->primary_key; ref_length= key->key_length; DBUG_PRINT("info", (" ref_length: %d", ref_length)); } // Init table lock structure - if (!(m_share=get_share(name))) + if (!(m_share=get_share(name, table))) DBUG_RETURN(1); thr_lock_data_init(&m_share->lock,&m_lock,(void*) 0); @@ -4366,7 +5185,8 @@ int ha_ndbcluster::open(const char *name, int mode, uint test_if_locked) set_tabname(name); if (check_ndb_connection()) { - free_share(m_share); m_share= 0; + free_share(&m_share); + m_share= 0; DBUG_RETURN(HA_ERR_NO_CONNECTION); } @@ -4377,6 +5197,28 @@ int ha_ndbcluster::open(const char *name, int mode, uint test_if_locked) DBUG_RETURN(res); } +/* + Set partition info + + SYNOPSIS + set_part_info() + part_info + + RETURN VALUE + NONE + + DESCRIPTION + Set up partition info when handler object created +*/ + +void ha_ndbcluster::set_part_info(partition_info *part_info) +{ + m_part_info= part_info; + if (!(m_part_info->part_type == HASH_PARTITION && + m_part_info->list_of_part_fields && + !m_part_info->is_sub_partitioned())) + m_use_partition_function= TRUE; +} /* Close the table @@ -4386,7 +5228,8 @@ int ha_ndbcluster::open(const char *name, int mode, uint test_if_locked) int ha_ndbcluster::close(void) { DBUG_ENTER("close"); - free_share(m_share); m_share= 0; + free_share(&m_share); + m_share= 0; release_metadata(); DBUG_RETURN(0); } @@ -4458,7 +5301,7 @@ int ha_ndbcluster::check_ndb_connection(THD* thd) } -int ndbcluster_close_connection(THD *thd) +static int ndbcluster_close_connection(THD *thd) { Thd_ndb *thd_ndb= get_thd_ndb(thd); DBUG_ENTER("ndbcluster_close_connection"); @@ -4482,41 +5325,66 @@ int ndbcluster_discover(THD* thd, const char *db, const char *name, const void* data; const NDBTAB* tab; Ndb* ndb; + char key[FN_REFLEN]; DBUG_ENTER("ndbcluster_discover"); DBUG_PRINT("enter", ("db: %s, name: %s", db, name)); if (!(ndb= check_ndb_in_thd(thd))) DBUG_RETURN(HA_ERR_NO_CONNECTION); ndb->setDatabaseName(db); - NDBDICT* dict= ndb->getDictionary(); dict->set_local_table_data_size(sizeof(Ndb_local_table_statistics)); dict->invalidateTable(name); - if (!(tab= dict->getTable(name))) - { - const NdbError err= dict->getNdbError(); - if (err.code == 709) - DBUG_RETURN(-1); - ERR_RETURN(err); - } - DBUG_PRINT("info", ("Found table %s", tab->getName())); - - len= tab->getFrmLength(); - if (len == 0 || tab->getFrmData() == NULL) + build_table_filename(key, sizeof(key), db, name, ""); + NDB_SHARE *share= get_share(key, 0, false); + if (share && get_ndb_share_state(share) == NSS_ALTERED) { - DBUG_PRINT("error", ("No frm data found.")); - DBUG_RETURN(1); + // Frm has been altered on disk, but not yet written to ndb + if (readfrm(key, &data, &len)) + { + DBUG_PRINT("error", ("Could not read frm")); + if (share) + free_share(&share); + DBUG_RETURN(1); + } } - - if (unpackfrm(&data, &len, tab->getFrmData())) + else { - DBUG_PRINT("error", ("Could not unpack table")); - DBUG_RETURN(1); + if (!(tab= dict->getTable(name))) + { + const NdbError err= dict->getNdbError(); + if (share) + free_share(&share); + if (err.code == 709 || err.code == 723) + DBUG_RETURN(-1); + ERR_RETURN(err); + } + DBUG_PRINT("info", ("Found table %s", tab->getName())); + + len= tab->getFrmLength(); + if (len == 0 || tab->getFrmData() == NULL) + { + DBUG_PRINT("error", ("No frm data found.")); + if (share) + free_share(&share); + DBUG_RETURN(1); + } + + if (unpackfrm(&data, &len, tab->getFrmData())) + { + DBUG_PRINT("error", ("Could not unpack table")); + if (share) + free_share(&share); + DBUG_RETURN(1); + } } *frmlen= len; *frmblob= data; + if (share) + free_share(&share); + DBUG_RETURN(0); } @@ -4542,7 +5410,7 @@ int ndbcluster_table_exists_in_engine(THD* thd, const char *db, const char *name if (!(tab= dict->getTable(name))) { const NdbError err= dict->getNdbError(); - if (err.code == 709) + if (err.code == 709 || err.code == 723) DBUG_RETURN(0); ERR_RETURN(err); } @@ -4563,9 +5431,10 @@ extern "C" byte* tables_get_key(const char *entry, uint *length, /* Drop a database in NDB Cluster - */ + NOTE add a dummy void function, since stupid handlerton is returning void instead of int... +*/ -int ndbcluster_drop_database(const char *path) +int ndbcluster_drop_database_impl(const char *path) { DBUG_ENTER("ndbcluster_drop_database"); THD *thd= current_thd; @@ -4580,33 +5449,38 @@ int ndbcluster_drop_database(const char *path) DBUG_PRINT("enter", ("db: %s", dbname)); if (!(ndb= check_ndb_in_thd(thd))) - DBUG_RETURN(HA_ERR_NO_CONNECTION); + DBUG_RETURN(-1); // List tables in NDB NDBDICT *dict= ndb->getDictionary(); if (dict->listObjects(list, NdbDictionary::Object::UserTable) != 0) - ERR_RETURN(dict->getNdbError()); + DBUG_RETURN(-1); for (i= 0 ; i < list.count ; i++) { - NdbDictionary::Dictionary::List::Element& t= list.elements[i]; - DBUG_PRINT("info", ("Found %s/%s in NDB", t.database, t.name)); + NdbDictionary::Dictionary::List::Element& elmt= list.elements[i]; + DBUG_PRINT("info", ("Found %s/%s in NDB", elmt.database, elmt.name)); // Add only tables that belongs to db - if (my_strcasecmp(system_charset_info, t.database, dbname)) + if (my_strcasecmp(system_charset_info, elmt.database, dbname)) continue; - DBUG_PRINT("info", ("%s must be dropped", t.name)); - drop_list.push_back(thd->strdup(t.name)); + DBUG_PRINT("info", ("%s must be dropped", elmt.name)); + drop_list.push_back(thd->strdup(elmt.name)); } // Drop any tables belonging to database + char full_path[FN_REFLEN]; + char *tmp= full_path + + build_table_filename(full_path, sizeof(full_path), dbname, "", ""); + ndb->setDatabaseName(dbname); List_iterator_fast<char> it(drop_list); while ((tabname=it++)) { - if (dict->dropTable(tabname)) + tablename_to_filename(tabname, tmp, FN_REFLEN - (tmp - full_path)-1); + if (ha_ndbcluster::delete_table(0, ndb, full_path, dbname, tabname)) { const NdbError err= dict->getNdbError(); - if (err.code != 709) + if (err.code != 709 && err.code != 723) { ERR_PRINT(err); ret= ndb_to_mysql_error(&err); @@ -4616,6 +5490,148 @@ int ndbcluster_drop_database(const char *path) DBUG_RETURN(ret); } +static void ndbcluster_drop_database(char *path) +{ + ndbcluster_drop_database_impl(path); +#ifdef HAVE_NDB_BINLOG + char db[FN_REFLEN]; + ha_ndbcluster::set_dbname(path, db); + ndbcluster_log_schema_op(current_thd, 0, + current_thd->query, current_thd->query_length, + db, "", 0, 0, SOT_DROP_DB); +#endif +} +/* + find all tables in ndb and discover those needed +*/ +int ndb_create_table_from_engine(THD *thd, const char *db, + const char *table_name) +{ + LEX *old_lex= thd->lex, newlex; + thd->lex= &newlex; + newlex.current_select= NULL; + lex_start(thd, (const uchar*) "", 0); + int res= ha_create_table_from_engine(thd, db, table_name); + thd->lex= old_lex; + return res; +} + +int ndbcluster_find_all_files(THD *thd) +{ + DBUG_ENTER("ndbcluster_find_all_files"); + Ndb* ndb; + char key[FN_REFLEN]; + NdbDictionary::Dictionary::List list; + + if (!(ndb= check_ndb_in_thd(thd))) + DBUG_RETURN(HA_ERR_NO_CONNECTION); + + NDBDICT *dict= ndb->getDictionary(); + + int unhandled, retries= 5; + do + { + if (dict->listObjects(list, NdbDictionary::Object::UserTable) != 0) + ERR_RETURN(dict->getNdbError()); + unhandled= 0; + for (uint i= 0 ; i < list.count ; i++) + { + NDBDICT::List::Element& elmt= list.elements[i]; + int do_handle_table= 0; + if (IS_TMP_PREFIX(elmt.name) || IS_NDB_BLOB_PREFIX(elmt.name)) + { + DBUG_PRINT("info", ("Skipping %s.%s in NDB", elmt.database, elmt.name)); + continue; + } + DBUG_PRINT("info", ("Found %s.%s in NDB", elmt.database, elmt.name)); + if (elmt.state == NDBOBJ::StateOnline || + elmt.state == NDBOBJ::StateBackup) + do_handle_table= 1; + else if (!(elmt.state == NDBOBJ::StateBuilding)) + { + sql_print_information("NDB: skipping setup table %s.%s, in state %d", + elmt.database, elmt.name, elmt.state); + continue; + } + + ndb->setDatabaseName(elmt.database); + const NDBTAB *ndbtab; + + if (!(ndbtab= dict->getTable(elmt.name))) + { + if (do_handle_table) + sql_print_error("NDB: failed to setup table %s.%s, error: %d, %s", + elmt.database, elmt.name, + dict->getNdbError().code, + dict->getNdbError().message); + unhandled++; + continue; + } + + if (ndbtab->getFrmLength() == 0) + continue; + + /* check if database exists */ + char *end= key + + build_table_filename(key, sizeof(key), elmt.database, "", ""); + if (my_access(key, F_OK)) + { + /* no such database defined, skip table */ + continue; + } + /* finalize construction of path */ + end+= tablename_to_filename(elmt.name, end, + sizeof(key)-(end-key)); + const void *data= 0, *pack_data= 0; + uint length, pack_length; + int discover= 0; + if (readfrm(key, &data, &length) || + packfrm(data, length, &pack_data, &pack_length)) + { + discover= 1; + sql_print_information("NDB: missing frm for %s.%s, discovering...", + elmt.database, elmt.name); + } + else if (cmp_frm(ndbtab, pack_data, pack_length)) + { + NDB_SHARE *share= get_share(key, 0, false); + if (!share || get_ndb_share_state(share) != NSS_ALTERED) + { + discover= 1; + sql_print_information("NDB: mismatch in frm for %s.%s, discovering...", + elmt.database, elmt.name); + } + if (share) + free_share(&share); + } + my_free((char*) data, MYF(MY_ALLOW_ZERO_PTR)); + my_free((char*) pack_data, MYF(MY_ALLOW_ZERO_PTR)); + + pthread_mutex_lock(&LOCK_open); + if (discover) + { + /* ToDo 4.1 database needs to be created if missing */ + if (ndb_create_table_from_engine(thd, elmt.database, elmt.name)) + { + /* ToDo 4.1 handle error */ + } + } +#ifdef HAVE_NDB_BINLOG + else + { + /* set up replication for this table */ + ndbcluster_create_binlog_setup(ndb, key, end-key, + elmt.database, elmt.name, + TRUE); + } +#endif + pthread_mutex_unlock(&LOCK_open); + } + } + while (unhandled && retries--); + + DBUG_RETURN(0); +} int ndbcluster_find_files(THD *thd,const char *db,const char *path, const char *wild, bool dir, List<char> *files) @@ -4627,7 +5643,7 @@ int ndbcluster_find_files(THD *thd,const char *db,const char *path, Ndb* ndb; char name[FN_REFLEN]; HASH ndb_tables, ok_tables; - NdbDictionary::Dictionary::List list; + NDBDICT::List list; if (!(ndb= check_ndb_in_thd(thd))) DBUG_RETURN(HA_ERR_NO_CONNECTION); @@ -4658,11 +5674,16 @@ int ndbcluster_find_files(THD *thd,const char *db,const char *path, for (i= 0 ; i < list.count ; i++) { - NdbDictionary::Dictionary::List::Element& t= list.elements[i]; - DBUG_PRINT("info", ("Found %s/%s in NDB", t.database, t.name)); + NDBDICT::List::Element& elmt= list.elements[i]; + if (IS_TMP_PREFIX(elmt.name) || IS_NDB_BLOB_PREFIX(elmt.name)) + { + DBUG_PRINT("info", ("Skipping %s.%s in NDB", elmt.database, elmt.name)); + continue; + } + DBUG_PRINT("info", ("Found %s/%s in NDB", elmt.database, elmt.name)); // Add only tables that belongs to db - if (my_strcasecmp(system_charset_info, t.database, db)) + if (my_strcasecmp(system_charset_info, elmt.database, db)) continue; // Apply wildcard to list of tables in NDB @@ -4670,14 +5691,14 @@ int ndbcluster_find_files(THD *thd,const char *db,const char *path, { if (lower_case_table_names) { - if (wild_case_compare(files_charset_info, t.name, wild)) + if (wild_case_compare(files_charset_info, elmt.name, wild)) continue; } - else if (wild_compare(t.name,wild,0)) + else if (wild_compare(elmt.name,wild,0)) continue; } - DBUG_PRINT("info", ("Inserting %s into ndb_tables hash", t.name)); - my_hash_insert(&ndb_tables, (byte*)thd->strdup(t.name)); + DBUG_PRINT("info", ("Inserting %s into ndb_tables hash", elmt.name)); + my_hash_insert(&ndb_tables, (byte*)thd->strdup(elmt.name)); } char *file_name; @@ -4695,10 +5716,9 @@ int ndbcluster_find_files(THD *thd,const char *db,const char *path, } // File is not in NDB, check for .ndb file with this name - (void)strxnmov(name, FN_REFLEN, - mysql_data_home,"/",db,"/",file_name,ha_ndb_ext,NullS); + build_table_filename(name, sizeof(name), db, file_name, ha_ndb_ext); DBUG_PRINT("info", ("Check access for %s", name)); - if (access(name, F_OK)) + if (my_access(name, F_OK)) { DBUG_PRINT("info", ("%s did not exist on disk", name)); // .ndb file did not exist on disk, another table type @@ -4717,6 +5737,24 @@ int ndbcluster_find_files(THD *thd,const char *db,const char *path, } } +#ifdef HAVE_NDB_BINLOG + /* setup logging to binlog for all discovered tables */ + { + char *end, *end1= name + + build_table_filename(name, sizeof(name), db, "", ""); + for (i= 0; i < ok_tables.records; i++) + { + file_name= (char*)hash_element(&ok_tables, i); + end= end1 + + tablename_to_filename(file_name, end1, sizeof(name) - (end1 - name)); + pthread_mutex_lock(&LOCK_open); + ndbcluster_create_binlog_setup(ndb, name, end-name, + db, file_name, TRUE); + pthread_mutex_unlock(&LOCK_open); + } + } +#endif + // Check for new files to discover DBUG_PRINT("info", ("Checking for new files to discover")); List<char> create_list; @@ -4725,10 +5763,14 @@ int ndbcluster_find_files(THD *thd,const char *db,const char *path, file_name= hash_element(&ndb_tables, i); if (!hash_search(&ok_tables, file_name, strlen(file_name))) { - DBUG_PRINT("info", ("%s must be discovered", file_name)); - // File is in list of ndb tables and not in ok_tables - // This table need to be created - create_list.push_back(thd->strdup(file_name)); + build_table_filename(name, sizeof(name), db, file_name, reg_ext); + if (my_access(name, F_OK)) + { + DBUG_PRINT("info", ("%s must be discovered", file_name)); + // File is in list of ndb tables and not in ok_tables + // This table need to be created + create_list.push_back(thd->strdup(file_name)); + } } } @@ -4762,11 +5804,11 @@ int ndbcluster_find_files(THD *thd,const char *db,const char *path, while ((file_name=it2++)) { DBUG_PRINT("info", ("Table %s need discovery", file_name)); - if (ha_create_table_from_engine(thd, db, file_name) == 0) + if (ndb_create_table_from_engine(thd, db, file_name) == 0) files->push_back(thd->strdup(file_name)); } - pthread_mutex_unlock(&LOCK_open); + pthread_mutex_unlock(&LOCK_open); hash_free(&ok_tables); hash_free(&ndb_tables); @@ -4784,10 +5826,18 @@ int ndbcluster_find_files(THD *thd,const char *db,const char *path, static int connect_callback() { update_status_variables(g_ndb_cluster_connection); + + uint node_id, i= 0; + Ndb_cluster_connection_node_iter node_iter; + memset((void *)g_node_id_map, 0xFFFF, sizeof(g_node_id_map)); + while ((node_id= g_ndb_cluster_connection->get_next_node(node_iter))) + g_node_id_map[node_id]= i++; + + pthread_cond_signal(&COND_ndb_util_thread); return 0; } -bool ndbcluster_init() +static bool ndbcluster_init() { int res; DBUG_ENTER("ndbcluster_init"); @@ -4795,6 +5845,25 @@ bool ndbcluster_init() if (have_ndbcluster != SHOW_OPTION_YES) goto ndbcluster_init_error; + { + handlerton &h= ndbcluster_hton; + h.close_connection= ndbcluster_close_connection; + h.commit= ndbcluster_commit; + h.rollback= ndbcluster_rollback; + h.create= ndbcluster_create_handler; /* Create a new handler */ + h.drop_database= ndbcluster_drop_database; /* Drop a database */ + h.panic= ndbcluster_end; /* Panic call */ + h.show_status= ndbcluster_show_status; /* Show status */ + h.alter_tablespace= ndbcluster_alter_tablespace; /* Show status */ + h.partition_flags= ndbcluster_partition_flags; /* Partition flags */ + h.alter_table_flags=ndbcluster_alter_table_flags; /* Alter table flags */ + h.fill_files_table= ndbcluster_fill_files_table; +#ifdef HAVE_NDB_BINLOG + ndbcluster_binlog_init_handlerton(); +#endif + h.flags= HTON_TEMPORARY_NOT_SUPPORTED; + } + // Set connectstring if specified if (opt_ndbcluster_connectstring != 0) DBUG_PRINT("connectstring", ("%s", opt_ndbcluster_connectstring)); @@ -4862,6 +5931,12 @@ bool ndbcluster_init() (void) hash_init(&ndbcluster_open_tables,system_charset_info,32,0,0, (hash_get_key) ndbcluster_get_key,0,0); pthread_mutex_init(&ndbcluster_mutex,MY_MUTEX_INIT_FAST); +#ifdef HAVE_NDB_BINLOG + /* start the ndb injector thread */ + if (ndbcluster_binlog_start()) + goto ndbcluster_init_error; +#endif /* HAVE_NDB_BINLOG */ + pthread_mutex_init(&LOCK_ndb_util_thread, MY_MUTEX_INIT_FAST); pthread_cond_init(&COND_ndb_util_thread, NULL); @@ -4892,26 +5967,13 @@ ndbcluster_init_error: DBUG_RETURN(TRUE); } - -/* - End use of the NDB Cluster table handler - - free all global variables allocated by - ndbcluster_init() -*/ - -bool ndbcluster_end() +static int ndbcluster_end(ha_panic_function type) { DBUG_ENTER("ndbcluster_end"); if (!ndbcluster_inited) DBUG_RETURN(0); - // Kill ndb utility thread - (void) pthread_mutex_lock(&LOCK_ndb_util_thread); - DBUG_PRINT("exit",("killing ndb util thread: %lx", ndb_util_thread)); - (void) pthread_cond_signal(&COND_ndb_util_thread); - (void) pthread_mutex_unlock(&LOCK_ndb_util_thread); - if (g_ndb) { #ifndef DBUG_OFF @@ -4941,6 +6003,23 @@ bool ndbcluster_end() DBUG_RETURN(0); } +void ha_ndbcluster::print_error(int error, myf errflag) +{ + DBUG_ENTER("ha_ndbcluster::print_error"); + DBUG_PRINT("enter", ("error = %d", error)); + + if (error == HA_ERR_NO_PARTITION_FOUND) + { + char buf[100]; + my_error(ER_NO_PARTITION_FOR_GIVEN_VALUE, MYF(0), + llstr(m_part_info->part_expr->val_int(), buf)); + } + else + handler::print_error(error, errflag); + DBUG_VOID_RETURN; +} + + /* Static error print function called from static handler method ndbcluster_commit @@ -4950,11 +6029,13 @@ bool ndbcluster_end() void ndbcluster_print_error(int error, const NdbOperation *error_op) { DBUG_ENTER("ndbcluster_print_error"); - TABLE tab; + TABLE_SHARE share; const char *tab_name= (error_op) ? error_op->getTableName() : ""; - tab.alias= (char *) tab_name; - ha_ndbcluster error_handler(&tab); - tab.file= &error_handler; + share.db.str= (char*) ""; + share.db.length= 0; + share.table_name.str= (char *) tab_name; + share.table_name.length= strlen(tab_name); + ha_ndbcluster error_handler(&share); error_handler.print_error(error, MYF(0)); DBUG_VOID_RETURN; } @@ -4965,8 +6046,10 @@ void ndbcluster_print_error(int error, const NdbOperation *error_op) */ void ha_ndbcluster::set_dbname(const char *path_name, char *dbname) { - char *end, *ptr; - + char *end, *ptr, *tmp_name; + char tmp_buff[FN_REFLEN]; + + tmp_name= tmp_buff; /* Scan name from the end */ ptr= strend(path_name)-1; while (ptr >= path_name && *ptr != '\\' && *ptr != '/') { @@ -4978,18 +6061,19 @@ void ha_ndbcluster::set_dbname(const char *path_name, char *dbname) ptr--; } uint name_len= end - ptr; - memcpy(dbname, ptr + 1, name_len); - dbname[name_len]= '\0'; + memcpy(tmp_name, ptr + 1, name_len); + tmp_name[name_len]= '\0'; #ifdef __WIN__ /* Put to lower case */ - ptr= dbname; + ptr= tmp_name; while (*ptr != '\0') { *ptr= tolower(*ptr); ptr++; } #endif + filename_to_tablename(tmp_name, dbname, FN_REFLEN); } /* @@ -5008,8 +6092,10 @@ void ha_ndbcluster::set_dbname(const char *path_name) void ha_ndbcluster::set_tabname(const char *path_name, char * tabname) { - char *end, *ptr; - + char *end, *ptr, *tmp_name; + char tmp_buff[FN_REFLEN]; + + tmp_name= tmp_buff; /* Scan name from the end */ end= strend(path_name)-1; ptr= end; @@ -5017,17 +6103,18 @@ ha_ndbcluster::set_tabname(const char *path_name, char * tabname) ptr--; } uint name_len= end - ptr; - memcpy(tabname, ptr + 1, end - ptr); - tabname[name_len]= '\0'; + memcpy(tmp_name, ptr + 1, end - ptr); + tmp_name[name_len]= '\0'; #ifdef __WIN__ /* Put to lower case */ - ptr= tabname; + ptr= tmp_name; while (*ptr != '\0') { *ptr= tolower(*ptr); ptr++; } #endif + filename_to_tablename(tmp_name, tabname, FN_REFLEN); } /* @@ -5062,6 +6149,84 @@ ha_ndbcluster::records_in_range(uint inx, key_range *min_key, (max_key && max_key->length == key_length))) DBUG_RETURN(1); + if ((idx_type == PRIMARY_KEY_ORDERED_INDEX || + idx_type == UNIQUE_ORDERED_INDEX || + idx_type == ORDERED_INDEX) && + m_index[inx].index_stat != NULL) + { + NDB_INDEX_DATA& d=m_index[inx]; + NDBINDEX* index=(NDBINDEX*)d.index; + Ndb* ndb=get_ndb(); + NdbTransaction* trans=NULL; + NdbIndexScanOperation* op=NULL; + int res=0; + Uint64 rows; + + do + { + // We must provide approx table rows + Uint64 table_rows=0; + Ndb_local_table_statistics *info= + (Ndb_local_table_statistics *)m_table_info; + if (info->records != ~(ha_rows)0 && info->records != 0) + { + table_rows = info->records; + DBUG_PRINT("info", ("use info->records: %llu", table_rows)); + } + else + { + Ndb_statistics stat; + if ((res=ndb_get_table_statistics(ndb, m_tabname, &stat)) != 0) + break; + table_rows=stat.row_count; + DBUG_PRINT("info", ("use db row_count: %llu", table_rows)); + if (table_rows == 0) { + // Problem if autocommit=0 +#ifdef ndb_get_table_statistics_uses_active_trans + rows=0; + break; +#endif + } + } + + // Define scan op for the range + if ((trans=m_active_trans) == NULL) + { + DBUG_PRINT("info", ("no active trans")); + if (! (trans=ndb->startTransaction())) + ERR_BREAK(ndb->getNdbError(), res); + } + if (! (op=trans->getNdbIndexScanOperation(index, (NDBTAB*)m_table))) + ERR_BREAK(trans->getNdbError(), res); + if ((op->readTuples(NdbOperation::LM_CommittedRead)) == -1) + ERR_BREAK(op->getNdbError(), res); + const key_range *keys[2]={ min_key, max_key }; + if ((res=set_bounds(op, inx, true, keys)) != 0) + break; + + // Decide if db should be contacted + int flags=0; + if (d.index_stat_query_count < d.index_stat_cache_entries || + (d.index_stat_update_freq != 0 && + d.index_stat_query_count % d.index_stat_update_freq == 0)) + { + DBUG_PRINT("info", ("force stat from db")); + flags|=NdbIndexStat::RR_UseDb; + } + if (d.index_stat->records_in_range(index, op, table_rows, &rows, flags) == -1) + ERR_BREAK(d.index_stat->getNdbError(), res); + d.index_stat_query_count++; + } while (0); + + if (trans != m_active_trans && rows == 0) + rows = 1; + if (trans != m_active_trans && trans != NULL) + ndb->closeTransaction(trans); + if (res != 0) + DBUG_RETURN(HA_POS_ERROR); + DBUG_RETURN(rows); + } + DBUG_RETURN(10); /* Good guess when you don't know anything */ } @@ -5074,7 +6239,7 @@ ulong ha_ndbcluster::table_flags(void) const } const char * ha_ndbcluster::table_type() const { - return("ndbcluster"); + return("NDBCLUSTER"); } uint ha_ndbcluster::max_supported_record_length() const { @@ -5132,11 +6297,11 @@ uint8 ha_ndbcluster::table_cache_type() uint ndb_get_commitcount(THD *thd, char *dbname, char *tabname, Uint64 *commit_count) { - DBUG_ENTER("ndb_get_commitcount"); - char name[FN_REFLEN]; NDB_SHARE *share; - (void)strxnmov(name, FN_REFLEN, "./",dbname,"/",tabname,NullS); + DBUG_ENTER("ndb_get_commitcount"); + + build_table_filename(name, sizeof(name), dbname, tabname, ""); DBUG_PRINT("enter", ("name: %s", name)); pthread_mutex_lock(&ndbcluster_mutex); if (!(share=(NDB_SHARE*) hash_search(&ndbcluster_open_tables, @@ -5144,8 +6309,7 @@ uint ndb_get_commitcount(THD *thd, char *dbname, char *tabname, strlen(name)))) { pthread_mutex_unlock(&ndbcluster_mutex); - DBUG_PRINT("info", ("Table %s not found in ndbcluster_open_tables", - name)); + DBUG_PRINT("info", ("Table %s not found in ndbcluster_open_tables", name)); DBUG_RETURN(1); } share->use_count++; @@ -5160,7 +6324,7 @@ uint ndb_get_commitcount(THD *thd, char *dbname, char *tabname, DBUG_PRINT("info", ("Getting commit_count: %llu from share", share->commit_count)); pthread_mutex_unlock(&share->mutex); - free_share(share); + free_share(&share); DBUG_RETURN(0); } } @@ -5175,7 +6339,7 @@ uint ndb_get_commitcount(THD *thd, char *dbname, char *tabname, struct Ndb_statistics stat; if (ndb_get_table_statistics(ndb, tabname, &stat)) { - free_share(share); + free_share(&share); DBUG_RETURN(1); } @@ -5192,7 +6356,7 @@ uint ndb_get_commitcount(THD *thd, char *dbname, char *tabname, *commit_count= 0; } pthread_mutex_unlock(&share->mutex); - free_share(share); + free_share(&share); DBUG_RETURN(0); } @@ -5311,7 +6475,7 @@ ha_ndbcluster::register_query_cache_table(THD *thd, if (!is_autocommit) { - DBUG_PRINT("exit", ("Can't register table during transaction")) + DBUG_PRINT("exit", ("Can't register table during transaction")); DBUG_RETURN(FALSE); } @@ -5319,7 +6483,7 @@ ha_ndbcluster::register_query_cache_table(THD *thd, if (ndb_get_commitcount(thd, m_dbname, m_tabname, &commit_count)) { *engine_data= 0; - DBUG_PRINT("exit", ("Error, could not get commitcount")) + DBUG_PRINT("exit", ("Error, could not get commitcount")); DBUG_RETURN(FALSE); } *engine_data= commit_count; @@ -5337,169 +6501,404 @@ ha_ndbcluster::register_query_cache_table(THD *thd, data we want to or can share. */ -static byte* ndbcluster_get_key(NDB_SHARE *share,uint *length, +static byte *ndbcluster_get_key(NDB_SHARE *share,uint *length, my_bool not_used __attribute__((unused))) { - *length=share->table_name_length; - return (byte*) share->table_name; + *length= share->key_length; + return (byte*) share->key; } -static NDB_SHARE* get_share(const char *table_name) +#ifndef DBUG_OFF +static void dbug_print_open_tables() +{ + DBUG_ENTER("dbug_print_open_tables"); + for (uint i= 0; i < ndbcluster_open_tables.records; i++) + { + NDB_SHARE *share= (NDB_SHARE*) hash_element(&ndbcluster_open_tables, i); + DBUG_PRINT("share", + ("[%d] 0x%lx key: %s key_length: %d", + i, share, share->key, share->key_length)); + DBUG_PRINT("share", + ("db.tablename: %s.%s use_count: %d commit_count: %d", + share->db, share->table_name, + share->use_count, share->commit_count)); +#ifdef HAVE_NDB_BINLOG + if (share->table) + DBUG_PRINT("share", + ("table->s->db.table_name: %s.%s", + share->table->s->db.str, share->table->s->table_name.str)); +#endif + } + DBUG_VOID_RETURN; +} +#else +#define dbug_print_open_tables() +#endif + +#ifdef HAVE_NDB_BINLOG +/* + For some reason a share is still around, try to salvage the situation + by closing all cached tables. If the share still exists, there is an + error somewhere but only report this to the error log. Keep this + "trailing share" but rename it since there are still references to it + to avoid segmentation faults. There is a risk that the memory for + this trailing share leaks. + + Must be called with previous pthread_mutex_lock(&ndbcluster_mutex) +*/ +int handle_trailing_share(NDB_SHARE *share) { - NDB_SHARE *share; + static ulong trailing_share_id= 0; + DBUG_ENTER("handle_trailing_share"); + + ++share->use_count; + pthread_mutex_unlock(&ndbcluster_mutex); + + close_cached_tables((THD*) 0, 0, (TABLE_LIST*) 0, TRUE); + pthread_mutex_lock(&ndbcluster_mutex); - uint length=(uint) strlen(table_name); - if (!(share=(NDB_SHARE*) hash_search(&ndbcluster_open_tables, - (byte*) table_name, - length))) + if (!--share->use_count) { - if ((share=(NDB_SHARE *) my_malloc(sizeof(*share)+length+1, - MYF(MY_WME | MY_ZEROFILL)))) - { - share->table_name_length=length; - share->table_name=(char*) (share+1); - strmov(share->table_name,table_name); - if (my_hash_insert(&ndbcluster_open_tables, (byte*) share)) - { - pthread_mutex_unlock(&ndbcluster_mutex); - my_free((gptr) share,0); - return 0; - } - thr_lock_init(&share->lock); - pthread_mutex_init(&share->mutex,MY_MUTEX_INIT_FAST); - share->commit_count= 0; - share->commit_count_lock= 0; - } - else + DBUG_PRINT("info", ("NDB_SHARE: close_cashed_tables %s freed share.", + share->key)); + real_free_share(&share); + DBUG_RETURN(0); + } + + /* + share still exists, if share has not been dropped by server + release that share + */ + if (share->state != NSS_DROPPED && !--share->use_count) + { + DBUG_PRINT("info", ("NDB_SHARE: %s already exists, " + "use_count=%d state != NSS_DROPPED.", + share->key, share->use_count)); + real_free_share(&share); + DBUG_RETURN(0); + } + DBUG_PRINT("error", ("NDB_SHARE: %s already exists use_count=%d.", + share->key, share->use_count)); + + sql_print_error("NDB_SHARE: %s already exists use_count=%d." + " Moving away for safety, but possible memleak.", + share->key, share->use_count); + dbug_print_open_tables(); + + /* + Ndb share has not been released as it should + */ + DBUG_ASSERT(FALSE); + + /* + This is probably an error. We can however save the situation + at the cost of a possible mem leak, by "renaming" the share + - First remove from hash + */ + hash_delete(&ndbcluster_open_tables, (byte*) share); + + /* + now give it a new name, just a running number + if space is not enough allocate some more + */ + { + const uint min_key_length= 10; + if (share->key_length < min_key_length) { - DBUG_PRINT("error", ("Failed to alloc share")); - pthread_mutex_unlock(&ndbcluster_mutex); - return 0; + share->key= alloc_root(&share->mem_root, min_key_length + 1); + share->key_length= min_key_length; } + share->key_length= + my_snprintf(share->key, min_key_length + 1, "#leak%d", + trailing_share_id++); } - share->use_count++; + /* Keep it for possible the future trailing free */ + my_hash_insert(&ndbcluster_open_tables, (byte*) share); - DBUG_PRINT("share", - ("table_name: %s, length: %d, use_count: %d, commit_count: %d", - share->table_name, share->table_name_length, share->use_count, - share->commit_count)); - pthread_mutex_unlock(&ndbcluster_mutex); - return share; + DBUG_RETURN(0); } - -static void free_share(NDB_SHARE *share) +/* + Rename share is used during rename table. +*/ +static int rename_share(NDB_SHARE *share, const char *new_key) { + NDB_SHARE *tmp; pthread_mutex_lock(&ndbcluster_mutex); - if (!--share->use_count) + uint new_length= (uint) strlen(new_key); + DBUG_PRINT("rename_share", ("old_key: %s old__length: %d", + share->key, share->key_length)); + if ((tmp= (NDB_SHARE*) hash_search(&ndbcluster_open_tables, + (byte*) new_key, new_length))) + handle_trailing_share(tmp); + + /* remove the share from hash */ + hash_delete(&ndbcluster_open_tables, (byte*) share); + dbug_print_open_tables(); + + /* save old stuff if insert should fail */ + uint old_length= share->key_length; + char *old_key= share->key; + + /* + now allocate and set the new key, db etc + enough space for key, db, and table_name + */ + share->key= alloc_root(&share->mem_root, 2 * (new_length + 1)); + strmov(share->key, new_key); + share->key_length= new_length; + + if (my_hash_insert(&ndbcluster_open_tables, (byte*) share)) + { + // ToDo free the allocated stuff above? + DBUG_PRINT("error", ("rename_share: my_hash_insert %s failed", + share->key)); + share->key= old_key; + share->key_length= old_length; + if (my_hash_insert(&ndbcluster_open_tables, (byte*) share)) + { + sql_print_error("rename_share: failed to recover %s", share->key); + DBUG_PRINT("error", ("rename_share: my_hash_insert %s failed", + share->key)); + } + dbug_print_open_tables(); + pthread_mutex_unlock(&ndbcluster_mutex); + return -1; + } + dbug_print_open_tables(); + + share->db= share->key + new_length + 1; + ha_ndbcluster::set_dbname(new_key, share->db); + share->table_name= share->db + strlen(share->db) + 1; + ha_ndbcluster::set_tabname(new_key, share->table_name); + + DBUG_PRINT("rename_share", + ("0x%lx key: %s key_length: %d", + share, share->key, share->key_length)); + DBUG_PRINT("rename_share", + ("db.tablename: %s.%s use_count: %d commit_count: %d", + share->db, share->table_name, + share->use_count, share->commit_count)); + if (share->table) { - hash_delete(&ndbcluster_open_tables, (byte*) share); - thr_lock_delete(&share->lock); - pthread_mutex_destroy(&share->mutex); - my_free((gptr) share, MYF(0)); + DBUG_PRINT("rename_share", + ("table->s->db.table_name: %s.%s", + share->table->s->db.str, share->table->s->table_name.str)); + + if (share->op == 0) + { + share->table->s->db.str= share->db; + share->table->s->db.length= strlen(share->db); + share->table->s->table_name.str= share->table_name; + share->table->s->table_name.length= strlen(share->table_name); + } } + /* else rename will be handled when the ALTER event comes */ + share->old_names= old_key; + // ToDo free old_names after ALTER EVENT + pthread_mutex_unlock(&ndbcluster_mutex); + return 0; } +#endif +/* + Increase refcount on existing share. + Always returns share and cannot fail. +*/ +NDB_SHARE *ndbcluster_get_share(NDB_SHARE *share) +{ + pthread_mutex_lock(&ndbcluster_mutex); + share->use_count++; + + dbug_print_open_tables(); + + DBUG_PRINT("get_share", + ("0x%lx key: %s key_length: %d", + share, share->key, share->key_length)); + DBUG_PRINT("get_share", + ("db.tablename: %s.%s use_count: %d commit_count: %d", + share->db, share->table_name, + share->use_count, share->commit_count)); + pthread_mutex_unlock(&ndbcluster_mutex); + return share; +} /* - Internal representation of the frm blob - + Get a share object for key + + Returns share for key, and increases the refcount on the share. + + create_if_not_exists == TRUE: + creates share if it does not alreade exist + returns 0 only due to out of memory, and then sets my_error + + create_if_not_exists == FALSE: + returns 0 if share does not exist + + have_lock == TRUE, pthread_mutex_lock(&ndbcluster_mutex) already taken */ -struct frm_blob_struct +NDB_SHARE *ndbcluster_get_share(const char *key, TABLE *table, + bool create_if_not_exists, + bool have_lock) { - struct frm_blob_header - { - uint ver; // Version of header - uint orglen; // Original length of compressed data - uint complen; // Compressed length of data, 0=uncompressed - } head; - char data[1]; -}; + THD *thd= current_thd; + NDB_SHARE *share; + uint length= (uint) strlen(key); + DBUG_ENTER("ndbcluster_get_share"); + DBUG_PRINT("enter", ("key: '%s'", key)); + if (!have_lock) + pthread_mutex_lock(&ndbcluster_mutex); + if (!(share= (NDB_SHARE*) hash_search(&ndbcluster_open_tables, + (byte*) key, + length))) + { + if (!create_if_not_exists) + { + DBUG_PRINT("error", ("get_share: %s does not exist", key)); + if (!have_lock) + pthread_mutex_unlock(&ndbcluster_mutex); + DBUG_RETURN(0); + } + if ((share= (NDB_SHARE*) my_malloc(sizeof(*share), + MYF(MY_WME | MY_ZEROFILL)))) + { + MEM_ROOT **root_ptr= + my_pthread_getspecific_ptr(MEM_ROOT**, THR_MALLOC); + MEM_ROOT *old_root= *root_ptr; + init_sql_alloc(&share->mem_root, 1024, 0); + *root_ptr= &share->mem_root; // remember to reset before return + share->state= NSS_INITIAL; + /* enough space for key, db, and table_name */ + share->key= alloc_root(*root_ptr, 2 * (length + 1)); + share->key_length= length; + strmov(share->key, key); + if (my_hash_insert(&ndbcluster_open_tables, (byte*) share)) + { + free_root(&share->mem_root, MYF(0)); + my_free((gptr) share, 0); + *root_ptr= old_root; + if (!have_lock) + pthread_mutex_unlock(&ndbcluster_mutex); + DBUG_RETURN(0); + } + thr_lock_init(&share->lock); + pthread_mutex_init(&share->mutex, MY_MUTEX_INIT_FAST); + share->commit_count= 0; + share->commit_count_lock= 0; + share->db= share->key + length + 1; + ha_ndbcluster::set_dbname(key, share->db); + share->table_name= share->db + strlen(share->db) + 1; + ha_ndbcluster::set_tabname(key, share->table_name); +#ifdef HAVE_NDB_BINLOG + ndbcluster_binlog_init_share(share, table); +#endif + *root_ptr= old_root; + } + else + { + DBUG_PRINT("error", ("get_share: failed to alloc share")); + if (!have_lock) + pthread_mutex_unlock(&ndbcluster_mutex); + my_error(ER_OUTOFMEMORY, MYF(0), sizeof(*share)); + DBUG_RETURN(0); + } + } + share->use_count++; + dbug_print_open_tables(); -static int packfrm(const void *data, uint len, - const void **pack_data, uint *pack_len) -{ - int error; - ulong org_len, comp_len; - uint blob_len; - frm_blob_struct* blob; - DBUG_ENTER("packfrm"); - DBUG_PRINT("enter", ("data: %x, len: %d", data, len)); - - error= 1; - org_len= len; - if (my_compress((byte*)data, &org_len, &comp_len)) - goto err; - - DBUG_PRINT("info", ("org_len: %d, comp_len: %d", org_len, comp_len)); - DBUG_DUMP("compressed", (char*)data, org_len); - - error= 2; - blob_len= sizeof(frm_blob_struct::frm_blob_header)+org_len; - if (!(blob= (frm_blob_struct*) my_malloc(blob_len,MYF(MY_WME)))) - goto err; - - // Store compressed blob in machine independent format - int4store((char*)(&blob->head.ver), 1); - int4store((char*)(&blob->head.orglen), comp_len); - int4store((char*)(&blob->head.complen), org_len); - - // Copy frm data into blob, already in machine independent format - memcpy(blob->data, data, org_len); - - *pack_data= blob; - *pack_len= blob_len; - error= 0; - - DBUG_PRINT("exit", ("pack_data: %x, pack_len: %d", *pack_data, *pack_len)); -err: - DBUG_RETURN(error); - + DBUG_PRINT("info", + ("0x%lx key: %s key_length: %d key: %s", + share, share->key, share->key_length, key)); + DBUG_PRINT("info", + ("db.tablename: %s.%s use_count: %d commit_count: %d", + share->db, share->table_name, + share->use_count, share->commit_count)); + if (!have_lock) + pthread_mutex_unlock(&ndbcluster_mutex); + DBUG_RETURN(share); } -static int unpackfrm(const void **unpack_data, uint *unpack_len, - const void *pack_data) +void ndbcluster_real_free_share(NDB_SHARE **share) { - const frm_blob_struct *blob= (frm_blob_struct*)pack_data; - byte *data; - ulong complen, orglen, ver; - DBUG_ENTER("unpackfrm"); - DBUG_PRINT("enter", ("pack_data: %x", pack_data)); + DBUG_ENTER("ndbcluster_real_free_share"); + DBUG_PRINT("real_free_share", + ("0x%lx key: %s key_length: %d", + (*share), (*share)->key, (*share)->key_length)); + DBUG_PRINT("real_free_share", + ("db.tablename: %s.%s use_count: %d commit_count: %d", + (*share)->db, (*share)->table_name, + (*share)->use_count, (*share)->commit_count)); - complen= uint4korr((char*)&blob->head.complen); - orglen= uint4korr((char*)&blob->head.orglen); - ver= uint4korr((char*)&blob->head.ver); - - DBUG_PRINT("blob",("ver: %d complen: %d orglen: %d", - ver,complen,orglen)); - DBUG_DUMP("blob->data", (char*) blob->data, complen); - - if (ver != 1) - DBUG_RETURN(1); - if (!(data= my_malloc(max(orglen, complen), MYF(MY_WME)))) - DBUG_RETURN(2); - memcpy(data, blob->data, complen); - - if (my_uncompress(data, &complen, &orglen)) - { - my_free((char*)data, MYF(0)); - DBUG_RETURN(3); - } + hash_delete(&ndbcluster_open_tables, (byte*) *share); + thr_lock_delete(&(*share)->lock); + pthread_mutex_destroy(&(*share)->mutex); + free_root(&(*share)->mem_root, MYF(0)); + +#ifdef HAVE_NDB_BINLOG + if ((*share)->table) + { + closefrm((*share)->table, 0); +#if 0 // todo ? + free_root(&(*share)->table->mem_root, MYF(0)); +#endif - *unpack_data= data; - *unpack_len= complen; +#ifndef DBUG_OFF + bzero((gptr)(*share)->table_share, sizeof(*(*share)->table_share)); + bzero((gptr)(*share)->table, sizeof(*(*share)->table)); +#endif + my_free((gptr) (*share)->table_share, MYF(0)); + my_free((gptr) (*share)->table, MYF(0)); +#ifndef DBUG_OFF + (*share)->table_share= 0; + (*share)->table= 0; +#endif + } +#endif + my_free((gptr) *share, MYF(0)); + *share= 0; + + dbug_print_open_tables(); + DBUG_VOID_RETURN; +} - DBUG_PRINT("exit", ("frmdata: %x, len: %d", *unpack_data, *unpack_len)); +/* + decrease refcount of share + calls real_free_share when refcount reaches 0 - DBUG_RETURN(0); + have_lock == TRUE, pthread_mutex_lock(&ndbcluster_mutex) already taken +*/ +void ndbcluster_free_share(NDB_SHARE **share, bool have_lock) +{ + if (!have_lock) + pthread_mutex_lock(&ndbcluster_mutex); + if ((*share)->util_lock == current_thd) + (*share)->util_lock= 0; + if (!--(*share)->use_count) + { + real_free_share(share); + } + else + { + dbug_print_open_tables(); + DBUG_PRINT("free_share", + ("0x%lx key: %s key_length: %d", + *share, (*share)->key, (*share)->key_length)); + DBUG_PRINT("free_share", + ("db.tablename: %s.%s use_count: %d commit_count: %d", + (*share)->db, (*share)->table_name, + (*share)->use_count, (*share)->commit_count)); + } + if (!have_lock) + pthread_mutex_unlock(&ndbcluster_mutex); } + static int ndb_get_table_statistics(Ndb* ndb, const char * table, @@ -5508,11 +6907,10 @@ ndb_get_table_statistics(Ndb* ndb, const char * table, DBUG_ENTER("ndb_get_table_statistics"); DBUG_PRINT("enter", ("table: %s", table)); NdbTransaction* pTrans= ndb->startTransaction(); + if (pTrans == NULL) + ERR_RETURN(ndb->getNdbError()); do { - if (pTrans == NULL) - break; - NdbScanOperation* pOp= pTrans->getNdbScanOperation(table); if (pOp == NULL) break; @@ -5583,17 +6981,17 @@ ndb_get_table_statistics(Ndb* ndb, const char * table, that the table with this name is a ndb table */ -int ha_ndbcluster::write_ndb_file() +int ha_ndbcluster::write_ndb_file(const char *name) { File file; bool error=1; char path[FN_REFLEN]; DBUG_ENTER("write_ndb_file"); - DBUG_PRINT("enter", ("db: %s, name: %s", m_dbname, m_tabname)); + DBUG_PRINT("enter", ("name: %s", name)); - (void)strxnmov(path, FN_REFLEN, - mysql_data_home,"/",m_dbname,"/",m_tabname,ha_ndb_ext,NullS); + (void)strxnmov(path, FN_REFLEN-1, + mysql_data_home,"/",name,ha_ndb_ext,NullS); if ((file=my_create(path, CREATE_MODE,O_RDWR | O_TRUNC,MYF(MY_WME))) >= 0) { @@ -5612,14 +7010,15 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p, HANDLER_BUFFER *buffer) { DBUG_ENTER("ha_ndbcluster::read_multi_range_first"); + m_write_op= FALSE; int res; KEY* key_info= table->key_info + active_index; NDB_INDEX_TYPE index_type= get_index_type(active_index); - ulong reclength= table->s->reclength; + ulong reclength= table_share->reclength; NdbOperation* op; - if (uses_blob_value(m_retrieve_all_fields)) + if (uses_blob_value()) { /** * blobs can't be batched currently @@ -5671,12 +7070,35 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p, for (; multi_range_curr<multi_range_end && curr+reclength <= end_of_buffer; multi_range_curr++) { - switch (index_type){ + part_id_range part_spec; + if (m_use_partition_function) + { + get_partition_set(table, curr, active_index, + &multi_range_curr->start_key, + &part_spec); + DBUG_PRINT("info", ("part_spec.start_part = %u, part_spec.end_part = %u", + part_spec.start_part, part_spec.end_part)); + /* + If partition pruning has found no partition in set + we can skip this scan + */ + if (part_spec.start_part > part_spec.end_part) + { + /* + We can skip this partition since the key won't fit into any + partition + */ + curr += reclength; + multi_range_curr->range_flag |= SKIP_RANGE; + continue; + } + } + switch(index_type){ case PRIMARY_KEY_ORDERED_INDEX: if (!(multi_range_curr->start_key.length == key_info->key_length && - multi_range_curr->start_key.flag == HA_READ_KEY_EXACT)) - goto range; - /* fall through */ + multi_range_curr->start_key.flag == HA_READ_KEY_EXACT)) + goto range; + // else fall through case PRIMARY_KEY_INDEX: { multi_range_curr->range_flag |= UNIQUE_RANGE; @@ -5684,7 +7106,9 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p, !op->readTuple(lm) && !set_primary_key(op, multi_range_curr->start_key.key) && !define_read_attrs(curr, op) && - (op->setAbortOption(AO_IgnoreError), TRUE)) + (op->setAbortOption(AO_IgnoreError), TRUE) && + (!m_use_partition_function || + (op->setPartitionId(part_spec.start_part), true))) curr += reclength; else ERR_RETURN(op ? op->getNdbError() : m_active_trans->getNdbError()); @@ -5693,11 +7117,11 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p, break; case UNIQUE_ORDERED_INDEX: if (!(multi_range_curr->start_key.length == key_info->key_length && - multi_range_curr->start_key.flag == HA_READ_KEY_EXACT && - !check_null_in_key(key_info, multi_range_curr->start_key.key, - multi_range_curr->start_key.length))) - goto range; - /* fall through */ + multi_range_curr->start_key.flag == HA_READ_KEY_EXACT && + !check_null_in_key(key_info, multi_range_curr->start_key.key, + multi_range_curr->start_key.length))) + goto range; + // else fall through case UNIQUE_INDEX: { multi_range_curr->range_flag |= UNIQUE_RANGE; @@ -5711,8 +7135,7 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p, ERR_RETURN(op ? op->getNdbError() : m_active_trans->getNdbError()); break; } - case ORDERED_INDEX: - { + case ORDERED_INDEX: { range: multi_range_curr->range_flag &= ~(uint)UNIQUE_RANGE; if (scanOp == 0) @@ -5745,7 +7168,8 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p, const key_range *keys[2]= { &multi_range_curr->start_key, &multi_range_curr->end_key }; - if ((res= set_bounds(scanOp, keys, multi_range_curr-ranges))) + if ((res= set_bounds(scanOp, active_index, false, keys, + multi_range_curr-ranges))) DBUG_RETURN(res); break; } @@ -5787,7 +7211,7 @@ ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p, } #if 0 -#define DBUG_MULTI_RANGE(x) printf("read_multi_range_next: case %d\n", x); +#define DBUG_MULTI_RANGE(x) DBUG_PRINT("info", ("read_multi_range_next: case %d\n", x)); #else #define DBUG_MULTI_RANGE(x) #endif @@ -5798,19 +7222,26 @@ ha_ndbcluster::read_multi_range_next(KEY_MULTI_RANGE ** multi_range_found_p) DBUG_ENTER("ha_ndbcluster::read_multi_range_next"); if (m_disable_multi_read) { + DBUG_MULTI_RANGE(11); DBUG_RETURN(handler::read_multi_range_next(multi_range_found_p)); } int res; int range_no; - ulong reclength= table->s->reclength; + ulong reclength= table_share->reclength; const NdbOperation* op= m_current_multi_operation; for (;multi_range_curr < m_multi_range_defined; multi_range_curr++) { + DBUG_MULTI_RANGE(12); + if (multi_range_curr->range_flag & SKIP_RANGE) + continue; if (multi_range_curr->range_flag & UNIQUE_RANGE) { if (op->getNdbError().code == 0) + { + DBUG_MULTI_RANGE(13); goto found_next; + } op= m_active_trans->getNextCompletedOperation(op); m_multi_range_result_ptr += reclength; @@ -5827,6 +7258,7 @@ ha_ndbcluster::read_multi_range_next(KEY_MULTI_RANGE ** multi_range_found_p) } else { + DBUG_MULTI_RANGE(14); goto close_scan; } } @@ -5860,6 +7292,7 @@ ha_ndbcluster::read_multi_range_next(KEY_MULTI_RANGE ** multi_range_found_p) DBUG_ASSERT(range_no == -1); if ((res= m_multi_cursor->nextResult(true))) { + DBUG_MULTI_RANGE(15); goto close_scan; } multi_range_curr--; // Will be increased in for-loop @@ -5887,12 +7320,16 @@ close_scan: } else { + DBUG_MULTI_RANGE(9); DBUG_RETURN(ndb_err(m_active_trans)); } } if (multi_range_curr == multi_range_end) + { + DBUG_MULTI_RANGE(16); DBUG_RETURN(HA_ERR_END_OF_FILE); + } /** * Read remaining ranges @@ -5940,7 +7377,7 @@ ha_ndbcluster::setup_recattr(const NdbRecAttr* curr) Field **field, **end; NdbValue *value= m_value; - end= table->field + table->s->fields; + end= table->field + table_share->fields; for (field= table->field; field < end; field++, value++) { @@ -6001,6 +7438,7 @@ pthread_handler_t ndb_util_thread_func(void *arg __attribute__((unused))) THD *thd; /* needs to be first for thread_stack */ Ndb* ndb; struct timespec abstime; + List<NDB_SHARE> util_open_tables; my_thread_init(); DBUG_ENTER("ndb_util_thread"); @@ -6021,10 +7459,72 @@ pthread_handler_t ndb_util_thread_func(void *arg __attribute__((unused))) delete ndb; DBUG_RETURN(NULL); } + thd->init_for_queries(); + thd->version=refresh_version; + thd->set_time(); + thd->main_security_ctx.host_or_ip= ""; + thd->client_capabilities = 0; + my_net_init(&thd->net, 0); + thd->main_security_ctx.master_access= ~0; + thd->main_security_ctx.priv_user = 0; + + /* + wait for mysql server to start + */ + pthread_mutex_lock(&LOCK_server_started); + while (!mysqld_server_started) + pthread_cond_wait(&COND_server_started, &LOCK_server_started); + pthread_mutex_unlock(&LOCK_server_started); + + /* + Wait for cluster to start + */ + pthread_mutex_lock(&LOCK_ndb_util_thread); + while (!ndb_cluster_node_id && (ndbcluster_hton.slot != ~(uint)0)) + { + /* ndb not connected yet */ + set_timespec(abstime, 1); + pthread_cond_timedwait(&COND_ndb_util_thread, + &LOCK_ndb_util_thread, + &abstime); + if (abort_loop) + { + pthread_mutex_unlock(&LOCK_ndb_util_thread); + goto ndb_util_thread_end; + } + } + pthread_mutex_unlock(&LOCK_ndb_util_thread); + + { + Thd_ndb *thd_ndb; + if (!(thd_ndb= ha_ndbcluster::seize_thd_ndb())) + { + sql_print_error("Could not allocate Thd_ndb object"); + goto ndb_util_thread_end; + } + set_thd_ndb(thd, thd_ndb); + thd_ndb->options|= TNO_NO_LOG_SCHEMA_OP; + } + +#ifdef HAVE_NDB_BINLOG + /* create tables needed by the replication */ + ndbcluster_setup_binlog_table_shares(thd); +#else + /* + Get all table definitions from the storage node + */ + ndbcluster_find_all_files(thd); +#endif + + ndbcluster_util_inited= 1; + +#ifdef HAVE_NDB_BINLOG + /* Signal injector thread that all is setup */ + pthread_cond_signal(&injector_cond); +#endif - List<NDB_SHARE> util_open_tables; set_timespec(abstime, 0); - for (;;) + for (;!abort_loop;) { pthread_mutex_lock(&LOCK_ndb_util_thread); @@ -6032,13 +7532,22 @@ pthread_handler_t ndb_util_thread_func(void *arg __attribute__((unused))) &LOCK_ndb_util_thread, &abstime); pthread_mutex_unlock(&LOCK_ndb_util_thread); - +#ifdef NDB_EXTRA_DEBUG_UTIL_THREAD DBUG_PRINT("ndb_util_thread", ("Started, ndb_cache_check_time: %d", ndb_cache_check_time)); - +#endif if (abort_loop) break; /* Shutting down server */ +#ifdef HAVE_NDB_BINLOG + /* + Check that the apply_status_share and schema_share has been created. + If not try to create it + */ + if (!apply_status_share || !schema_share) + ndbcluster_setup_binlog_table_shares(thd); +#endif + if (ndb_cache_check_time == 0) { /* Wake up in 1 second to check if value has changed */ @@ -6052,6 +7561,12 @@ pthread_handler_t ndb_util_thread_func(void *arg __attribute__((unused))) for (uint i= 0; i < ndbcluster_open_tables.records; i++) { share= (NDB_SHARE *)hash_element(&ndbcluster_open_tables, i); +#ifdef HAVE_NDB_BINLOG + if ((share->use_count - (int) (share->op != 0) - (int) (share->op != 0)) + <= 0) + continue; // injector thread is the only user, skip statistics + share->util_lock= current_thd; // Mark that util thread has lock +#endif /* HAVE_NDB_BINLOG */ share->use_count++; /* Make sure the table can't be closed */ DBUG_PRINT("ndb_util_thread", ("Found open table[%d]: %s, use_count: %d", @@ -6066,20 +7581,23 @@ pthread_handler_t ndb_util_thread_func(void *arg __attribute__((unused))) List_iterator_fast<NDB_SHARE> it(util_open_tables); while ((share= it++)) { - /* Split tab- and dbname */ - char buf[FN_REFLEN]; - char *tabname, *db; - uint length= dirname_length(share->table_name); - tabname= share->table_name+length; - memcpy(buf, share->table_name, length-1); - buf[length-1]= 0; - db= buf+dirname_length(buf); +#ifdef HAVE_NDB_BINLOG + if ((share->use_count - (int) (share->op != 0) - (int) (share->op != 0)) + <= 1) + { + /* + Util thread and injector thread is the only user, skip statistics + */ + free_share(&share); + continue; + } +#endif /* HAVE_NDB_BINLOG */ DBUG_PRINT("ndb_util_thread", ("Fetching commit count for: %s", - share->table_name)); + share->key)); /* Contact NDB to get commit count for table */ - ndb->setDatabaseName(db); + ndb->setDatabaseName(share->db); struct Ndb_statistics stat; uint lock; @@ -6087,17 +7605,17 @@ pthread_handler_t ndb_util_thread_func(void *arg __attribute__((unused))) lock= share->commit_count_lock; pthread_mutex_unlock(&share->mutex); - if (ndb_get_table_statistics(ndb, tabname, &stat) == 0) + if (ndb_get_table_statistics(ndb, share->table_name, &stat) == 0) { DBUG_PRINT("ndb_util_thread", ("Table: %s, commit_count: %llu, rows: %llu", - share->table_name, stat.commit_count, stat.row_count)); + share->key, stat.commit_count, stat.row_count)); } else { DBUG_PRINT("ndb_util_thread", ("Error: Could not get commit count for table %s", - share->table_name)); + share->key)); stat.commit_count= 0; } @@ -6107,7 +7625,7 @@ pthread_handler_t ndb_util_thread_func(void *arg __attribute__((unused))) pthread_mutex_unlock(&share->mutex); /* Decrease the use count and possibly free share */ - free_share(share); + free_share(&share); } /* Clear the list of open tables */ @@ -6134,7 +7652,9 @@ pthread_handler_t ndb_util_thread_func(void *arg __attribute__((unused))) abstime.tv_nsec-= 1000000000; } } - +ndb_util_thread_end: + sql_print_information("Stopping Cluster Utility thread"); + net_end(&thd->net); thd->cleanup(); delete thd; delete ndb; @@ -6526,9 +8046,9 @@ void ndb_serialize_cond(const Item *item, void *arg) } else { - DBUG_PRINT("info", ("Was not expecting field from table %s(%s)", - context->table->s->table_name, - field->table->s->table_name)); + DBUG_PRINT("info", ("Was not expecting field from table %s (%s)", + context->table->s->table_name.str, + field->table->s->table_name.str)); context->supported= FALSE; } break; @@ -7122,6 +8642,8 @@ ha_ndbcluster::build_scan_filter_predicate(Ndb_cond * &cond, : NULL; break; default: + field= NULL; //Keep compiler happy + DBUG_ASSERT(0); break; } switch ((negated) ? @@ -7469,32 +8991,81 @@ ha_ndbcluster::generate_scan_filter(Ndb_cond_stack *ndb_cond_stack, DBUG_RETURN(0); } -int -ndbcluster_show_status(THD* thd) +/* + get table space info for SHOW CREATE TABLE +*/ +char* ha_ndbcluster::get_tablespace_name(THD *thd) { - Protocol *protocol= thd->protocol; - + Ndb *ndb= check_ndb_in_thd(thd); + NDBDICT *ndbdict= ndb->getDictionary(); + NdbError ndberr; + Uint32 id; + ndb->setDatabaseName(m_dbname); + const NDBTAB *ndbtab= ndbdict->getTable(m_tabname); + if (ndbtab == 0) + { + ndberr= ndbdict->getNdbError(); + goto err; + } + if (!ndbtab->getTablespace(&id)) + { + return 0; + } + { + NdbDictionary::Tablespace ts= ndbdict->getTablespace(id); + ndberr= ndbdict->getNdbError(); + if(ndberr.classification != ndberror_cl_none) + goto err; + return (my_strdup(ts.getName(), MYF(0))); + } +err: + if (ndberr.status == NdbError::TemporaryError) + push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_ERROR, + ER_GET_TEMPORARY_ERRMSG, ER(ER_GET_TEMPORARY_ERRMSG), + ndberr.code, ndberr.message, "NDB"); + else + push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_ERROR, + ER_GET_ERRMSG, ER(ER_GET_ERRMSG), + ndberr.code, ndberr.message, "NDB"); + return 0; +} + +/* + Implements the SHOW NDB STATUS command. +*/ +bool +ndbcluster_show_status(THD* thd, stat_print_fn *stat_print, + enum ha_stat_type stat_type) +{ + char buf[IO_SIZE]; + uint buflen; DBUG_ENTER("ndbcluster_show_status"); if (have_ndbcluster != SHOW_OPTION_YES) { - my_message(ER_NOT_SUPPORTED_YET, - "Cannot call SHOW NDBCLUSTER STATUS because skip-ndbcluster is " - "defined", - MYF(0)); - DBUG_RETURN(TRUE); + DBUG_RETURN(FALSE); } - - List<Item> field_list; - field_list.push_back(new Item_empty_string("free_list", 255)); - field_list.push_back(new Item_return_int("created", 10,MYSQL_TYPE_LONG)); - field_list.push_back(new Item_return_int("free", 10,MYSQL_TYPE_LONG)); - field_list.push_back(new Item_return_int("sizeof", 10,MYSQL_TYPE_LONG)); - - if (protocol->send_fields(&field_list, - Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)) + if (stat_type != HA_ENGINE_STATUS) + { + DBUG_RETURN(FALSE); + } + + update_status_variables(g_ndb_cluster_connection); + buflen= + my_snprintf(buf, sizeof(buf), + "cluster_node_id=%u, " + "connected_host=%s, " + "connected_port=%u, " + "number_of_storage_nodes=%u", + ndb_cluster_node_id, + ndb_connected_host, + ndb_connected_port, + ndb_number_of_storage_nodes); + if (stat_print(thd, ndbcluster_hton.name, strlen(ndbcluster_hton.name), + "connection", strlen("connection"), + buf, buflen)) DBUG_RETURN(TRUE); - + if (get_thd_ndb(thd) && get_thd_ndb(thd)->ndb) { Ndb* ndb= (get_thd_ndb(thd))->ndb; @@ -7502,19 +9073,817 @@ ndbcluster_show_status(THD* thd) tmp.m_name= 0; while (ndb->get_free_list_usage(&tmp)) { - protocol->prepare_for_resend(); - - protocol->store(tmp.m_name, &my_charset_bin); - protocol->store((uint)tmp.m_created); - protocol->store((uint)tmp.m_free); - protocol->store((uint)tmp.m_sizeof); - if (protocol->write()) - DBUG_RETURN(TRUE); + buflen= + my_snprintf(buf, sizeof(buf), + "created=%u, free=%u, sizeof=%u", + tmp.m_created, tmp.m_free, tmp.m_sizeof); + if (stat_print(thd, ndbcluster_hton.name, strlen(ndbcluster_hton.name), + tmp.m_name, strlen(tmp.m_name), buf, buflen)) + DBUG_RETURN(TRUE); + } + } +#ifdef HAVE_NDB_BINLOG + ndbcluster_show_status_binlog(thd, stat_print, stat_type); +#endif + + DBUG_RETURN(FALSE); +} + + +/* + Create a table in NDB Cluster + */ +static uint get_no_fragments(ulonglong max_rows) +{ +#if MYSQL_VERSION_ID >= 50000 + uint acc_row_size= 25 + /*safety margin*/ 2; +#else + uint acc_row_size= pk_length*4; + /* add acc overhead */ + if (pk_length <= 8) /* main page will set the limit */ + acc_row_size+= 25 + /*safety margin*/ 2; + else /* overflow page will set the limit */ + acc_row_size+= 4 + /*safety margin*/ 4; +#endif + ulonglong acc_fragment_size= 512*1024*1024; +#if MYSQL_VERSION_ID >= 50100 + return (max_rows*acc_row_size)/acc_fragment_size+1; +#else + return ((max_rows*acc_row_size)/acc_fragment_size+1 + +1/*correct rounding*/)/2; +#endif +} + + +/* + Routine to adjust default number of partitions to always be a multiple + of number of nodes and never more than 4 times the number of nodes. + +*/ +static bool adjusted_frag_count(uint no_fragments, uint no_nodes, + uint &reported_frags) +{ + uint i= 0; + reported_frags= no_nodes; + while (reported_frags < no_fragments && ++i < 4 && + (reported_frags + no_nodes) < MAX_PARTITIONS) + reported_frags+= no_nodes; + return (reported_frags < no_fragments); +} + +int ha_ndbcluster::get_default_no_partitions(ulonglong max_rows) +{ + uint reported_frags; + uint no_fragments= get_no_fragments(max_rows); + uint no_nodes= g_ndb_cluster_connection->no_db_nodes(); + if (adjusted_frag_count(no_fragments, no_nodes, reported_frags)) + { + push_warning(current_thd, + MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR, + "Ndb might have problems storing the max amount of rows specified"); + } + return (int)reported_frags; +} + + +/* + Set-up auto-partitioning for NDB Cluster + + SYNOPSIS + set_auto_partitions() + part_info Partition info struct to set-up + + RETURN VALUE + NONE + + DESCRIPTION + Set-up auto partitioning scheme for tables that didn't define any + partitioning. We'll use PARTITION BY KEY() in this case which + translates into partition by primary key if a primary key exists + and partition by hidden key otherwise. +*/ + +void ha_ndbcluster::set_auto_partitions(partition_info *part_info) +{ + DBUG_ENTER("ha_ndbcluster::set_auto_partitions"); + part_info->list_of_part_fields= TRUE; + part_info->part_type= HASH_PARTITION; + switch (opt_ndb_distribution_id) + { + case ND_KEYHASH: + part_info->linear_hash_ind= FALSE; + break; + case ND_LINHASH: + part_info->linear_hash_ind= TRUE; + break; + } + DBUG_VOID_RETURN; +} + + +int ha_ndbcluster::set_range_data(void *tab_ref, partition_info *part_info) +{ + NDBTAB *tab= (NDBTAB*)tab_ref; + int32 *range_data= (int32*)my_malloc(part_info->no_parts*sizeof(int32), + MYF(0)); + uint i; + int error= 0; + DBUG_ENTER("set_range_data"); + + if (!range_data) + { + mem_alloc_error(part_info->no_parts*sizeof(int32)); + DBUG_RETURN(1); + } + for (i= 0; i < part_info->no_parts; i++) + { + longlong range_val= part_info->range_int_array[i]; + if (range_val < INT_MIN32 || range_val >= INT_MAX32) + { + if ((i != part_info->no_parts - 1) || + (range_val != LONGLONG_MAX)) + { + my_error(ER_LIMITED_PART_RANGE, MYF(0), "NDB"); + error= 1; + goto error; + } + range_val= INT_MAX32; + } + range_data[i]= (int32)range_val; + } + tab->setRangeListData(range_data, sizeof(int32)*part_info->no_parts); +error: + my_free((char*)range_data, MYF(0)); + DBUG_RETURN(error); +} + +int ha_ndbcluster::set_list_data(void *tab_ref, partition_info *part_info) +{ + NDBTAB *tab= (NDBTAB*)tab_ref; + int32 *list_data= (int32*)my_malloc(part_info->no_list_values * 2 + * sizeof(int32), MYF(0)); + uint32 *part_id, i; + int error= 0; + DBUG_ENTER("set_list_data"); + + if (!list_data) + { + mem_alloc_error(part_info->no_list_values*2*sizeof(int32)); + DBUG_RETURN(1); + } + for (i= 0; i < part_info->no_list_values; i++) + { + LIST_PART_ENTRY *list_entry= &part_info->list_array[i]; + longlong list_val= list_entry->list_value; + if (list_val < INT_MIN32 || list_val > INT_MAX32) + { + my_error(ER_LIMITED_PART_RANGE, MYF(0), "NDB"); + error= 1; + goto error; } + list_data[2*i]= (int32)list_val; + part_id= (uint32*)&list_data[2*i+1]; + *part_id= list_entry->partition_id; + } + tab->setRangeListData(list_data, 2*sizeof(int32)*part_info->no_list_values); +error: + my_free((char*)list_data, MYF(0)); + DBUG_RETURN(error); +} + +/* + User defined partitioning set-up. We need to check how many fragments the + user wants defined and which node groups to put those into. Later we also + want to attach those partitions to a tablespace. + + All the functionality of the partition function, partition limits and so + forth are entirely handled by the MySQL Server. There is one exception to + this rule for PARTITION BY KEY where NDB handles the hash function and + this type can thus be handled transparently also by NDB API program. + For RANGE, HASH and LIST and subpartitioning the NDB API programs must + implement the function to map to a partition. +*/ + +uint ha_ndbcluster::set_up_partition_info(partition_info *part_info, + TABLE *table, + void *tab_par) +{ + uint16 frag_data[MAX_PARTITIONS]; + char *ts_names[MAX_PARTITIONS]; + ulong ts_index= 0, fd_index= 0, i, j; + NDBTAB *tab= (NDBTAB*)tab_par; + NDBTAB::FragmentType ftype= NDBTAB::UserDefined; + partition_element *part_elem; + bool first= TRUE; + uint ts_id, ts_version, part_count= 0, tot_ts_name_len; + List_iterator<partition_element> part_it(part_info->partitions); + int error; + char *name_ptr; + DBUG_ENTER("ha_ndbcluster::set_up_partition_info"); + + if (part_info->part_type == HASH_PARTITION && + part_info->list_of_part_fields == TRUE) + { + Field **fields= part_info->part_field_array; + + if (part_info->linear_hash_ind) + ftype= NDBTAB::DistrKeyLin; + else + ftype= NDBTAB::DistrKeyHash; + + for (i= 0; i < part_info->part_field_list.elements; i++) + { + NDBCOL *col= tab->getColumn(fields[i]->fieldnr - 1); + DBUG_PRINT("info",("setting dist key on %s", col->getName())); + col->setPartitionKey(TRUE); + } + } + else + { + /* + Create a shadow field for those tables that have user defined + partitioning. This field stores the value of the partition + function such that NDB can handle reorganisations of the data + even when the MySQL Server isn't available to assist with + calculation of the partition function value. + */ + NDBCOL col; + DBUG_PRINT("info", ("Generating partition func value field")); + col.setName("$PART_FUNC_VALUE"); + col.setType(NdbDictionary::Column::Int); + col.setLength(1); + col.setNullable(FALSE); + col.setPrimaryKey(FALSE); + col.setAutoIncrement(FALSE); + tab->addColumn(col); + if (part_info->part_type == RANGE_PARTITION) + { + if ((error= set_range_data((void*)tab, part_info))) + { + DBUG_RETURN(error); + } + } + else if (part_info->part_type == LIST_PARTITION) + { + if ((error= set_list_data((void*)tab, part_info))) + { + DBUG_RETURN(error); + } + } + } + tab->setFragmentType(ftype); + i= 0; + tot_ts_name_len= 0; + do + { + uint ng; + part_elem= part_it++; + if (!part_info->is_sub_partitioned()) + { + ng= part_elem->nodegroup_id; + if (first && ng == UNDEF_NODEGROUP) + ng= 0; + ts_names[fd_index]= part_elem->tablespace_name; + frag_data[fd_index++]= ng; + } + else + { + List_iterator<partition_element> sub_it(part_elem->subpartitions); + j= 0; + do + { + part_elem= sub_it++; + ng= part_elem->nodegroup_id; + if (first && ng == UNDEF_NODEGROUP) + ng= 0; + ts_names[fd_index]= part_elem->tablespace_name; + frag_data[fd_index++]= ng; + } while (++j < part_info->no_subparts); + } + first= FALSE; + } while (++i < part_info->no_parts); + tab->setDefaultNoPartitionsFlag(part_info->use_default_no_partitions); + tab->setLinearFlag(part_info->linear_hash_ind); + tab->setMaxRows(table->s->max_rows); + tab->setTablespaceNames(ts_names, fd_index*sizeof(char*)); + tab->setFragmentCount(fd_index); + tab->setFragmentData(&frag_data, fd_index*2); + DBUG_RETURN(0); +} + + +bool ha_ndbcluster::check_if_incompatible_data(HA_CREATE_INFO *info, + uint table_changes) +{ + DBUG_ENTER("ha_ndbcluster::check_if_incompatible_data"); + uint i; + const NDBTAB *tab= (const NDBTAB *) m_table; + + for (i= 0; i < table->s->fields; i++) + { + Field *field= table->field[i]; + const NDBCOL *col= tab->getColumn(field->field_name); + if (field->add_index && + col->getStorageType() == NdbDictionary::Column::StorageTypeDisk) + { + DBUG_PRINT("info", ("add/drop index not supported for disk stored column")); + DBUG_RETURN(COMPATIBLE_DATA_NO); + } + } + if (table_changes != IS_EQUAL_YES) + DBUG_RETURN(COMPATIBLE_DATA_NO); + + /* Check that auto_increment value was not changed */ + if ((info->used_fields & HA_CREATE_USED_AUTO) && + info->auto_increment_value != 0) + DBUG_RETURN(COMPATIBLE_DATA_NO); + + /* Check that row format didn't change */ + if ((info->used_fields & HA_CREATE_USED_AUTO) && + get_row_type() != info->row_type) + DBUG_RETURN(COMPATIBLE_DATA_NO); + + DBUG_RETURN(COMPATIBLE_DATA_YES); +} + +bool set_up_tablespace(st_alter_tablespace *info, + NdbDictionary::Tablespace *ndb_ts) +{ + ndb_ts->setName(info->tablespace_name); + ndb_ts->setExtentSize(info->extent_size); + ndb_ts->setDefaultLogfileGroup(info->logfile_group_name); + return false; +} + +bool set_up_datafile(st_alter_tablespace *info, + NdbDictionary::Datafile *ndb_df) +{ + if (info->max_size > 0) + { + my_error(ER_TABLESPACE_AUTO_EXTEND_ERROR, MYF(0)); + return true; + } + ndb_df->setPath(info->data_file_name); + ndb_df->setSize(info->initial_size); + ndb_df->setTablespace(info->tablespace_name); + return false; +} + +bool set_up_logfile_group(st_alter_tablespace *info, + NdbDictionary::LogfileGroup *ndb_lg) +{ + ndb_lg->setName(info->logfile_group_name); + ndb_lg->setUndoBufferSize(info->undo_buffer_size); + return false; +} + +bool set_up_undofile(st_alter_tablespace *info, + NdbDictionary::Undofile *ndb_uf) +{ + ndb_uf->setPath(info->undo_file_name); + ndb_uf->setSize(info->initial_size); + ndb_uf->setLogfileGroup(info->logfile_group_name); + return false; +} + +int ndbcluster_alter_tablespace(THD* thd, st_alter_tablespace *info) +{ + DBUG_ENTER("ha_ndbcluster::alter_tablespace"); + + int is_tablespace= 0; + Ndb *ndb= check_ndb_in_thd(thd); + if (ndb == NULL) + { + DBUG_RETURN(HA_ERR_NO_CONNECTION); } - send_eof(thd); + NDBDICT *dict = ndb->getDictionary(); + int error; + const char * errmsg; + + switch (info->ts_cmd_type){ + case (CREATE_TABLESPACE): + { + error= ER_CREATE_FILEGROUP_FAILED; + + NdbDictionary::Tablespace ndb_ts; + NdbDictionary::Datafile ndb_df; + if (set_up_tablespace(info, &ndb_ts)) + { + DBUG_RETURN(1); + } + if (set_up_datafile(info, &ndb_df)) + { + DBUG_RETURN(1); + } + errmsg= "TABLESPACE"; + if (dict->createTablespace(ndb_ts)) + { + DBUG_PRINT("error", ("createTablespace returned %d", error)); + goto ndberror; + } + DBUG_PRINT("info", ("Successfully created Tablespace")); + errmsg= "DATAFILE"; + if (dict->createDatafile(ndb_df)) + { + DBUG_PRINT("error", ("createDatafile returned %d", error)); + goto ndberror; + } + is_tablespace= 1; + break; + } + case (ALTER_TABLESPACE): + { + error= ER_ALTER_FILEGROUP_FAILED; + if (info->ts_alter_tablespace_type == ALTER_TABLESPACE_ADD_FILE) + { + NdbDictionary::Datafile ndb_df; + if (set_up_datafile(info, &ndb_df)) + { + DBUG_RETURN(1); + } + errmsg= " CREATE DATAFILE"; + if (dict->createDatafile(ndb_df)) + { + goto ndberror; + } + } + else if(info->ts_alter_tablespace_type == ALTER_TABLESPACE_DROP_FILE) + { + NdbDictionary::Datafile df = dict->getDatafile(0, + info->data_file_name); + if (strcmp(df.getPath(), info->data_file_name) == 0) + { + errmsg= " DROP DATAFILE"; + if (dict->dropDatafile(df)) + { + goto ndberror; + } + } + else + { + DBUG_PRINT("error", ("No such datafile")); + my_error(ER_ALTER_FILEGROUP_FAILED, MYF(0), " NO SUCH FILE"); + DBUG_RETURN(1); + } + } + else + { + DBUG_PRINT("error", ("Unsupported alter tablespace: %d", + info->ts_alter_tablespace_type)); + DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED); + } + is_tablespace= 1; + break; + } + case (CREATE_LOGFILE_GROUP): + { + error= ER_CREATE_FILEGROUP_FAILED; + NdbDictionary::LogfileGroup ndb_lg; + NdbDictionary::Undofile ndb_uf; + if (info->undo_file_name == NULL) + { + /* + REDO files in LOGFILE GROUP not supported yet + */ + DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED); + } + if (set_up_logfile_group(info, &ndb_lg)) + { + DBUG_RETURN(1); + } + errmsg= "LOGFILE GROUP"; + if (dict->createLogfileGroup(ndb_lg)) + { + goto ndberror; + } + DBUG_PRINT("info", ("Successfully created Logfile Group")); + if (set_up_undofile(info, &ndb_uf)) + { + DBUG_RETURN(1); + } + errmsg= "UNDOFILE"; + if (dict->createUndofile(ndb_uf)) + { + goto ndberror; + } + break; + } + case (ALTER_LOGFILE_GROUP): + { + error= ER_ALTER_FILEGROUP_FAILED; + if (info->undo_file_name == NULL) + { + /* + REDO files in LOGFILE GROUP not supported yet + */ + DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED); + } + NdbDictionary::Undofile ndb_uf; + if (set_up_undofile(info, &ndb_uf)) + { + DBUG_RETURN(1); + } + errmsg= "CREATE UNDOFILE"; + if (dict->createUndofile(ndb_uf)) + { + goto ndberror; + } + break; + } + case (DROP_TABLESPACE): + { + error= ER_DROP_FILEGROUP_FAILED; + errmsg= "TABLESPACE"; + if (dict->dropTablespace(dict->getTablespace(info->tablespace_name))) + { + goto ndberror; + } + is_tablespace= 1; + break; + } + case (DROP_LOGFILE_GROUP): + { + error= ER_DROP_FILEGROUP_FAILED; + errmsg= "LOGFILE GROUP"; + if (dict->dropLogfileGroup(dict->getLogfileGroup(info->logfile_group_name))) + { + goto ndberror; + } + break; + } + case (CHANGE_FILE_TABLESPACE): + { + DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED); + } + case (ALTER_ACCESS_MODE_TABLESPACE): + { + DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED); + } + default: + { + DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED); + } + } +#ifdef HAVE_NDB_BINLOG + if (is_tablespace) + ndbcluster_log_schema_op(thd, 0, + thd->query, thd->query_length, + "", info->tablespace_name, + 0, 0, + SOT_TABLESPACE); + else + ndbcluster_log_schema_op(thd, 0, + thd->query, thd->query_length, + "", info->logfile_group_name, + 0, 0, + SOT_LOGFILE_GROUP); +#endif DBUG_RETURN(FALSE); + +ndberror: + const NdbError err= dict->getNdbError(); + ERR_PRINT(err); + ndb_to_mysql_error(&err); + + my_error(error, MYF(0), errmsg); + DBUG_RETURN(1); +} + + +bool ha_ndbcluster::get_no_parts(const char *name, uint *no_parts) +{ + Ndb *ndb; + NDBDICT *dict; + const NDBTAB *tab; + int err; + DBUG_ENTER("ha_ndbcluster::get_no_parts"); + + set_dbname(name); + set_tabname(name); + do + { + if (check_ndb_connection()) + { + err= HA_ERR_NO_CONNECTION; + break; + } + ndb= get_ndb(); + ndb->setDatabaseName(m_dbname); + dict= ndb->getDictionary(); + if (!(tab= dict->getTable(m_tabname))) + ERR_BREAK(dict->getNdbError(), err); + // Check if thread has stale local cache + if (tab->getObjectStatus() == NdbDictionary::Object::Invalid) + { + invalidate_dictionary_cache(FALSE); + if (!(tab= dict->getTable(m_tabname))) + ERR_BREAK(dict->getNdbError(), err); + } + *no_parts= tab->getFragmentCount(); + DBUG_RETURN(FALSE); + } while (1); + +end: + print_error(err, MYF(0)); + DBUG_RETURN(TRUE); } -#endif /* HAVE_NDBCLUSTER_DB */ +static int ndbcluster_fill_files_table(THD *thd, TABLE_LIST *tables, COND *cond) +{ + TABLE* table= tables->table; + Ndb *ndb= check_ndb_in_thd(thd); + NdbDictionary::Dictionary* dict= ndb->getDictionary(); + NdbDictionary::Dictionary::List dflist; + NdbError ndberr; + unsigned i; + + DBUG_ENTER("ndbcluster_fill_files_table"); + + dict->listObjects(dflist, NdbDictionary::Object::Datafile); + ndberr= dict->getNdbError(); + if (ndberr.classification != NdbError::NoError) + ERR_RETURN(ndberr); + + for (i= 0; i < dflist.count; i++) + { + NdbDictionary::Dictionary::List::Element& elt = dflist.elements[i]; + Ndb_cluster_connection_node_iter iter; + unsigned id; + + g_ndb_cluster_connection->init_get_next_node(iter); + + while ((id= g_ndb_cluster_connection->get_next_node(iter))) + { + NdbDictionary::Datafile df= dict->getDatafile(id, elt.name); + ndberr= dict->getNdbError(); + if(ndberr.classification != NdbError::NoError) + { + if (ndberr.classification == NdbError::SchemaError) + continue; + ERR_RETURN(ndberr); + } + NdbDictionary::Tablespace ts= dict->getTablespace(df.getTablespace()); + ndberr= dict->getNdbError(); + if (ndberr.classification != NdbError::NoError) + { + if (ndberr.classification == NdbError::SchemaError) + continue; + ERR_RETURN(ndberr); + } + + int c= 0; + table->field[c++]->set_null(); // FILE_ID + table->field[c++]->store(elt.name, strlen(elt.name), + system_charset_info); + table->field[c++]->store("DATAFILE",8,system_charset_info); + table->field[c++]->store(df.getTablespace(), strlen(df.getTablespace()), + system_charset_info); + table->field[c++]->set_null(); // TABLE_CATALOG + table->field[c++]->set_null(); // TABLE_SCHEMA + table->field[c++]->set_null(); // TABLE_NAME + + // LOGFILE_GROUP_NAME + table->field[c++]->store(ts.getDefaultLogfileGroup(), + strlen(ts.getDefaultLogfileGroup()), + system_charset_info); + table->field[c++]->set_null(); // LOGFILE_GROUP_NUMBER + table->field[c++]->store(ndbcluster_hton.name, + strlen(ndbcluster_hton.name), + system_charset_info); // ENGINE + + table->field[c++]->set_null(); // FULLTEXT_KEYS + table->field[c++]->set_null(); // DELETED_ROWS + table->field[c++]->set_null(); // UPDATE_COUNT + table->field[c++]->store(df.getFree() / ts.getExtentSize()); // FREE_EXTENTS + table->field[c++]->store(df.getSize() / ts.getExtentSize()); // TOTAL_EXTENTS + table->field[c++]->store(ts.getExtentSize()); // EXTENT_SIZE + + table->field[c++]->store(df.getSize()); // INITIAL_SIZE + table->field[c++]->store(df.getSize()); // MAXIMUM_SIZE + table->field[c++]->set_null(); // AUTOEXTEND_SIZE + + table->field[c++]->set_null(); // CREATION_TIME + table->field[c++]->set_null(); // LAST_UPDATE_TIME + table->field[c++]->set_null(); // LAST_ACCESS_TIME + table->field[c++]->set_null(); // RECOVER_TIME + table->field[c++]->set_null(); // TRANSACTION_COUNTER + + table->field[c++]->store(df.getObjectVersion()); // VERSION + + table->field[c++]->store("FIXED", 5, system_charset_info); // ROW_FORMAT + + table->field[c++]->set_null(); // TABLE_ROWS + table->field[c++]->set_null(); // AVG_ROW_LENGTH + table->field[c++]->set_null(); // DATA_LENGTH + table->field[c++]->set_null(); // MAX_DATA_LENGTH + table->field[c++]->set_null(); // INDEX_LENGTH + table->field[c++]->set_null(); // DATA_FREE + table->field[c++]->set_null(); // CREATE_TIME + table->field[c++]->set_null(); // UPDATE_TIME + table->field[c++]->set_null(); // CHECK_TIME + table->field[c++]->set_null(); // CHECKSUM + + table->field[c++]->store("NORMAL", 6, system_charset_info); + + char extra[30]; + int len= my_snprintf(extra, sizeof(extra), "CLUSTER_NODE=%u", id); + table->field[c]->store(extra, len, system_charset_info); + schema_table_store_record(thd, table); + } + } + + dict->listObjects(dflist, NdbDictionary::Object::Undofile); + ndberr= dict->getNdbError(); + if (ndberr.classification != NdbError::NoError) + ERR_RETURN(ndberr); + + for (i= 0; i < dflist.count; i++) + { + NdbDictionary::Dictionary::List::Element& elt= dflist.elements[i]; + Ndb_cluster_connection_node_iter iter; + unsigned id; + + g_ndb_cluster_connection->init_get_next_node(iter); + + while ((id= g_ndb_cluster_connection->get_next_node(iter))) + { + NdbDictionary::Undofile uf= dict->getUndofile(id, elt.name); + ndberr= dict->getNdbError(); + if (ndberr.classification != NdbError::NoError) + { + if (ndberr.classification == NdbError::SchemaError) + continue; + ERR_RETURN(ndberr); + } + NdbDictionary::LogfileGroup lfg= + dict->getLogfileGroup(uf.getLogfileGroup()); + ndberr= dict->getNdbError(); + if (ndberr.classification != NdbError::NoError) + { + if (ndberr.classification == NdbError::SchemaError) + continue; + ERR_RETURN(ndberr); + } + + int c= 0; + table->field[c++]->set_null(); // FILE_ID + table->field[c++]->store(elt.name, strlen(elt.name), + system_charset_info); + table->field[c++]->store("UNDO LOG", 8, system_charset_info); + table->field[c++]->set_null(); // TABLESPACE NAME + table->field[c++]->set_null(); // TABLE_CATALOG + table->field[c++]->set_null(); // TABLE_SCHEMA + table->field[c++]->set_null(); // TABLE_NAME + + // LOGFILE_GROUP_NAME + table->field[c++]->store(uf.getLogfileGroup(), + strlen(uf.getLogfileGroup()), + system_charset_info); + table->field[c++]->store(uf.getLogfileGroupId()); // LOGFILE_GROUP_NUMBER + table->field[c++]->store(ndbcluster_hton.name, + strlen(ndbcluster_hton.name), + system_charset_info); // ENGINE + + table->field[c++]->set_null(); // FULLTEXT_KEYS + table->field[c++]->set_null(); // DELETED_ROWS + table->field[c++]->set_null(); // UPDATE_COUNT + table->field[c++]->store(lfg.getUndoFreeWords()); // FREE_EXTENTS + table->field[c++]->store(lfg.getUndoBufferSize()); // TOTAL_EXTENTS + table->field[c++]->store(4); // EXTENT_SIZE + + table->field[c++]->store(uf.getSize()); // INITIAL_SIZE + table->field[c++]->store(uf.getSize()); // MAXIMUM_SIZE + table->field[c++]->set_null(); // AUTOEXTEND_SIZE + + table->field[c++]->set_null(); // CREATION_TIME + table->field[c++]->set_null(); // LAST_UPDATE_TIME + table->field[c++]->set_null(); // LAST_ACCESS_TIME + table->field[c++]->set_null(); // RECOVER_TIME + table->field[c++]->set_null(); // TRANSACTION_COUNTER + + table->field[c++]->store(uf.getObjectVersion()); // VERSION + + table->field[c++]->set_null(); // ROW FORMAT + + table->field[c++]->set_null(); // TABLE_ROWS + table->field[c++]->set_null(); // AVG_ROW_LENGTH + table->field[c++]->set_null(); // DATA_LENGTH + table->field[c++]->set_null(); // MAX_DATA_LENGTH + table->field[c++]->set_null(); // INDEX_LENGTH + table->field[c++]->set_null(); // DATA_FREE + table->field[c++]->set_null(); // CREATE_TIME + table->field[c++]->set_null(); // UPDATE_TIME + table->field[c++]->set_null(); // CHECK_TIME + table->field[c++]->set_null(); // CHECKSUM + + table->field[c++]->store("NORMAL", 6, system_charset_info); + + char extra[30]; + int len= my_snprintf(extra,sizeof(extra),"CLUSTER_NODE=%u",id); + table->field[c]->store(extra, len, system_charset_info); + schema_table_store_record(thd, table); + } + } + DBUG_RETURN(0); +} diff --git a/sql/ha_ndbcluster.h b/sql/ha_ndbcluster.h index a44604b05b0..7498d2bb624 100644 --- a/sql/ha_ndbcluster.h +++ b/sql/ha_ndbcluster.h @@ -25,6 +25,10 @@ #pragma interface /* gcc class implementation */ #endif +/* Blob tables and events are internal to NDB and must never be accessed */ +#define IS_NDB_BLOB_PREFIX(A) is_prefix(A, "NDB$BLOB") + +#include <NdbApi.hpp> #include <ndbapi_limits.h> #define NDB_HIDDEN_PRIMARY_KEY_LENGTH 8 @@ -37,10 +41,16 @@ class NdbScanOperation; class NdbScanFilter; class NdbIndexScanOperation; class NdbBlob; +class NdbIndexStat; +class NdbEventOperation; // connectstring to cluster if given by mysqld extern const char *ndbcluster_connectstring; extern ulong ndb_cache_check_time; +#ifdef HAVE_NDB_BINLOG +extern ulong ndb_report_thresh_binlog_epoch_slip; +extern ulong ndb_report_thresh_binlog_mem_usage; +#endif typedef enum ndb_index_type { UNDEFINED_INDEX = 0, @@ -51,22 +61,92 @@ typedef enum ndb_index_type { ORDERED_INDEX = 5 } NDB_INDEX_TYPE; +typedef enum ndb_index_status { + UNDEFINED = 0, + ACTIVE = 1, + TO_BE_DROPPED = 2 +} NDB_INDEX_STATUS; + typedef struct ndb_index_data { NDB_INDEX_TYPE type; + NDB_INDEX_STATUS status; void *index; void *unique_index; unsigned char *unique_index_attrid_map; + // In this version stats are not shared between threads + NdbIndexStat* index_stat; + uint index_stat_cache_entries; + // Simple counter mechanism to decide when to connect to db + uint index_stat_update_freq; + uint index_stat_query_count; } NDB_INDEX_DATA; +typedef union { const NdbRecAttr *rec; NdbBlob *blob; void *ptr; } NdbValue; + +int get_ndb_blobs_value(TABLE* table, NdbValue* value_array, + byte*& buffer, uint& buffer_size, + my_ptrdiff_t ptrdiff); + +typedef enum { + NSS_INITIAL= 0, + NSS_DROPPED, + NSS_ALTERED +} NDB_SHARE_STATE; + typedef struct st_ndbcluster_share { + NDB_SHARE_STATE state; + MEM_ROOT mem_root; THR_LOCK lock; pthread_mutex_t mutex; - char *table_name; - uint table_name_length,use_count; + char *key; + uint key_length; + THD *util_lock; + uint use_count; uint commit_count_lock; ulonglong commit_count; + char *db; + char *table_name; +#ifdef HAVE_NDB_BINLOG + uint32 flags; + NdbEventOperation *op; + NdbEventOperation *op_old; // for rename table + char *old_names; // for rename table + TABLE_SHARE *table_share; + TABLE *table; + NdbValue *ndb_value[2]; + MY_BITMAP *subscriber_bitmap; + MY_BITMAP slock_bitmap; + uint32 slock[256/32]; // 256 bits for lock status of table +#endif } NDB_SHARE; +inline +NDB_SHARE_STATE +get_ndb_share_state(NDB_SHARE *share) +{ + NDB_SHARE_STATE state; + pthread_mutex_lock(&share->mutex); + state= share->state; + pthread_mutex_unlock(&share->mutex); + return state; +} + +inline +void +set_ndb_share_state(NDB_SHARE *share, NDB_SHARE_STATE state) +{ + pthread_mutex_lock(&share->mutex); + share->state= state; + pthread_mutex_unlock(&share->mutex); +} + +#ifdef HAVE_NDB_BINLOG +/* NDB_SHARE.flags */ +#define NSF_HIDDEN_PK 1 /* table has hidden primary key */ +#define NSF_BLOB_FLAG 2 /* table has blob attributes */ +#define NSF_NO_BINLOG 4 /* table should not be binlogged */ +#endif + typedef enum ndb_item_type { NDB_VALUE = 0, // Qualified more with Item::Type NDB_FIELD = 1, // Qualified from table definition @@ -115,6 +195,7 @@ struct negated_function_mapping NDB_FUNC_TYPE neg_fun; }; + /* Define what functions can be negated in condition pushdown. Note, these HAVE to be in the same order as in definition enum @@ -439,6 +520,11 @@ class Ndb_cond_traverse_context Place holder for ha_ndbcluster thread specific data */ +enum THD_NDB_OPTIONS +{ + TNO_NO_LOG_SCHEMA_OP= 1 << 0 +}; + class Thd_ndb { public: @@ -450,13 +536,14 @@ class Thd_ndb NdbTransaction *all; NdbTransaction *stmt; int error; + uint32 options; List<NDB_SHARE> changed_tables; }; class ha_ndbcluster: public handler { public: - ha_ndbcluster(TABLE *table); + ha_ndbcluster(TABLE_SHARE *table); ~ha_ndbcluster(); int open(const char *name, int mode, uint test_if_locked); @@ -465,7 +552,7 @@ class ha_ndbcluster: public handler int write_row(byte *buf); int update_row(const byte *old_data, byte *new_data); int delete_row(const byte *buf); - int index_init(uint index); + int index_init(uint index, bool sorted); int index_end(); int index_read(byte *buf, const byte *key, uint key_len, enum ha_rkey_function find_flag); @@ -489,6 +576,7 @@ class ha_ndbcluster: public handler bool eq_range, bool sorted, byte* buf); int read_range_next(); + int alter_tablespace(st_alter_tablespace *info); /** * Multi range stuff @@ -500,13 +588,19 @@ class ha_ndbcluster: public handler bool get_error_message(int error, String *buf); void info(uint); + void get_dynamic_partition_info(PARTITION_INFO *stat_info, uint part_id); int extra(enum ha_extra_function operation); int extra_opt(enum ha_extra_function operation, ulong cache_size); int external_lock(THD *thd, int lock_type); int start_stmt(THD *thd, thr_lock_type lock_type); + void print_error(int error, myf errflag); const char * table_type() const; const char ** bas_ext() const; ulong table_flags(void) const; + int add_index(TABLE *table_arg, KEY *key_info, uint num_of_keys); + int prepare_drop_index(TABLE *table_arg, uint *key_num, uint num_of_keys); + int final_drop_index(TABLE *table_arg); + void set_part_info(partition_info *part_info); ulong index_flags(uint idx, uint part, bool all_parts) const; uint max_supported_record_length() const; uint max_supported_keys() const; @@ -517,12 +611,20 @@ class ha_ndbcluster: public handler int rename_table(const char *from, const char *to); int delete_table(const char *name); int create(const char *name, TABLE *form, HA_CREATE_INFO *info); + int create_handler_files(const char *file); + int get_default_no_partitions(ulonglong max_rows); + bool get_no_parts(const char *name, uint *no_parts); + void set_auto_partitions(partition_info *part_info); + THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to, enum thr_lock_type lock_type); bool low_byte_first() const; bool has_transactions(); + + virtual bool is_injective() const { return true; } + const char* index_type(uint key_number); double scan_time(); @@ -580,30 +682,67 @@ static void set_tabname(const char *pathname, char *tabname); uint key_length, qc_engine_callback *engine_callback, ulonglong *engine_data); + + bool check_if_incompatible_data(HA_CREATE_INFO *info, + uint table_changes); + static int invalidate_dictionary_cache(TABLE_SHARE *share, Ndb *ndb, + const char *dbname, const char *tabname, + bool global); + private: + friend int ndbcluster_drop_database_impl(const char *path); + friend int ndb_handle_schema_change(THD *thd, + Ndb *ndb, NdbEventOperation *pOp, + NDB_SHARE *share); + int alter_table_name(const char *to); - int drop_table(); - int create_index(const char *name, KEY *key_info, bool unique); + static int delete_table(ha_ndbcluster *h, Ndb *ndb, + const char *path, + const char *db, + const char *table_name); + int drop_ndb_table(); + int create_ndb_index(const char *name, KEY *key_info, bool unique); int create_ordered_index(const char *name, KEY *key_info); int create_unique_index(const char *name, KEY *key_info); + int create_index(const char *name, KEY *key_info, + NDB_INDEX_TYPE idx_type, uint idx_no); + int drop_ndb_index(const char *name); + int table_changed(const void *pack_frm_data, uint pack_frm_len); +// Index list management + int create_indexes(Ndb *ndb, TABLE *tab); + void clear_index(int i); + void clear_indexes(); + int open_indexes(Ndb *ndb, TABLE *tab, bool ignore_error); + void renumber_indexes(Ndb *ndb, TABLE *tab); + int drop_indexes(Ndb *ndb, TABLE *tab); + int add_index_handle(THD *thd, NdbDictionary::Dictionary *dict, + KEY *key_info, const char *index_name, uint index_no); int initialize_autoincrement(const void *table); - enum ILBP {ILBP_CREATE = 0, ILBP_OPEN = 1}; // Index List Build Phase - int build_index_list(Ndb *ndb, TABLE *tab, enum ILBP phase); int get_metadata(const char* path); void release_metadata(); NDB_INDEX_TYPE get_index_type(uint idx_no) const; NDB_INDEX_TYPE get_index_type_from_table(uint index_no) const; + NDB_INDEX_TYPE get_index_type_from_key(uint index_no, KEY *key_info) const; int check_index_fields_not_null(uint index_no); - int pk_read(const byte *key, uint key_len, byte *buf); - int complemented_pk_read(const byte *old_data, byte *new_data); - int peek_row(const byte *record); - int unique_index_read(const byte *key, uint key_len, - byte *buf); + uint set_up_partition_info(partition_info *part_info, + TABLE *table, + void *tab); + char* get_tablespace_name(THD *thd); + int set_range_data(void *tab, partition_info* part_info); + int set_list_data(void *tab, partition_info* part_info); + int complemented_read(const byte *old_data, byte *new_data, + uint32 old_part_id); + int pk_read(const byte *key, uint key_len, byte *buf, uint32 part_id); int ordered_index_scan(const key_range *start_key, const key_range *end_key, - bool sorted, bool descending, byte* buf); + bool sorted, bool descending, byte* buf, + part_id_range *part_spec); int full_table_scan(byte * buf); + + int peek_row(const byte *record); + int unique_index_read(const byte *key, uint key_len, + byte *buf); int fetch_next(NdbScanOperation* op); int next_result(byte *buf); int define_read_attrs(byte* buf, NdbOperation* op); @@ -621,13 +760,15 @@ private: uint fieldnr, const byte* field_ptr); int set_ndb_key(NdbOperation*, Field *field, uint fieldnr, const byte* field_ptr); - int set_ndb_value(NdbOperation*, Field *field, uint fieldnr, bool *set_blob_value= 0); + int set_ndb_value(NdbOperation*, Field *field, uint fieldnr, + int row_offset= 0, bool *set_blob_value= 0); int get_ndb_value(NdbOperation*, Field *field, uint fieldnr, byte*); friend int g_get_ndb_blobs_value(NdbBlob *ndb_blob, void *arg); int get_ndb_blobs_value(NdbBlob *last_ndb_blob); int set_primary_key(NdbOperation *op, const byte *key); int set_primary_key_from_record(NdbOperation *op, const byte *record); - int set_bounds(NdbIndexScanOperation*, const key_range *keys[2], uint= 0); + int set_bounds(NdbIndexScanOperation*, uint inx, bool rir, + const key_range *keys[2], uint= 0); int key_cmp(uint keynr, const byte * old_row, const byte * new_row); int set_index_key(NdbOperation *, const KEY *key_info, const byte *key_ptr); void print_results(); @@ -635,11 +776,11 @@ private: ulonglong get_auto_increment(); void invalidate_dictionary_cache(bool global); int ndb_err(NdbTransaction*); - bool uses_blob_value(bool all_fields); + bool uses_blob_value(); char *update_table_comment(const char * comment); - int write_ndb_file(); + int write_ndb_file(const char *name); int check_ndb_connection(THD* thd= current_thd); @@ -665,6 +806,7 @@ private: NdbScanOperation* op); friend int execute_commit(ha_ndbcluster*, NdbTransaction*); + friend int execute_no_commit_ignore_no_key(ha_ndbcluster*, NdbTransaction*); friend int execute_no_commit(ha_ndbcluster*, NdbTransaction*); friend int execute_no_commit_ie(ha_ndbcluster*, NdbTransaction*); @@ -681,14 +823,18 @@ private: NDB_SHARE *m_share; NDB_INDEX_DATA m_index[MAX_KEY]; // NdbRecAttr has no reference to blob - typedef union { const NdbRecAttr *rec; NdbBlob *blob; void *ptr; } NdbValue; NdbValue m_value[NDB_MAX_ATTRIBUTES_IN_TABLE]; byte m_ref[NDB_HIDDEN_PRIMARY_KEY_LENGTH]; + partition_info *m_part_info; + byte *m_rec0; + Field **m_part_field_array; + bool m_use_partition_function; + bool m_sorted; bool m_use_write; bool m_ignore_dup_key; bool m_primary_key_update; - bool m_retrieve_all_fields; - bool m_retrieve_primary_key; + bool m_write_op; + bool m_ignore_no_key; ha_rows m_rows_to_insert; ha_rows m_rows_inserted; ha_rows m_bulk_insert_rows; @@ -718,10 +864,7 @@ private: Ndb *get_ndb(); }; -extern struct show_var_st ndb_status_variables[]; - -bool ndbcluster_init(void); -bool ndbcluster_end(void); +extern SHOW_VAR ndb_status_variables[]; int ndbcluster_discover(THD* thd, const char* dbname, const char* name, const void** frmblob, uint* frmlen); @@ -729,8 +872,4 @@ int ndbcluster_find_files(THD *thd,const char *db,const char *path, const char *wild, bool dir, List<char> *files); int ndbcluster_table_exists_in_engine(THD* thd, const char *db, const char *name); -int ndbcluster_drop_database(const char* path); - void ndbcluster_print_error(int error, const NdbOperation *error_op); - -int ndbcluster_show_status(THD*); diff --git a/sql/ha_ndbcluster_binlog.cc b/sql/ha_ndbcluster_binlog.cc new file mode 100644 index 00000000000..f88c118b46e --- /dev/null +++ b/sql/ha_ndbcluster_binlog.cc @@ -0,0 +1,3187 @@ +/* Copyright (C) 2000-2003 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "mysql_priv.h" +#include "ha_ndbcluster.h" + +#ifdef HAVE_NDB_BINLOG +#include "rpl_injector.h" +#include "rpl_filter.h" +#include "slave.h" +#include "ha_ndbcluster_binlog.h" + +#ifdef ndb_dynamite +#undef assert +#define assert(x) do { if(x) break; ::printf("%s %d: assert failed: %s\n", __FILE__, __LINE__, #x); ::fflush(stdout); ::signal(SIGABRT,SIG_DFL); ::abort(); ::kill(::getpid(),6); ::kill(::getpid(),9); } while (0) +#endif + +/* + defines for cluster replication table names +*/ +#include "ha_ndbcluster_tables.h" +#define NDB_APPLY_TABLE_FILE "./" NDB_REP_DB "/" NDB_APPLY_TABLE +#define NDB_SCHEMA_TABLE_FILE "./" NDB_REP_DB "/" NDB_SCHEMA_TABLE + +/* + Flag showing if the ndb injector thread is running, if so == 1 + -1 if it was started but later stopped for some reason + 0 if never started +*/ +int ndb_binlog_thread_running= 0; +/* + Flag showing if the ndb binlog should be created, if so == TRUE + FALSE if not +*/ +my_bool ndb_binlog_running= FALSE; + +/* + Global reference to the ndb injector thread THD oject + + Has one sole purpose, for setting the in_use table member variable + in get_share(...) +*/ +THD *injector_thd= 0; + +/* + Global reference to ndb injector thd object. + + Used mainly by the binlog index thread, but exposed to the client sql + thread for one reason; to setup the events operations for a table + to enable ndb injector thread receiving events. + + Must therefore always be used with a surrounding + pthread_mutex_lock(&injector_mutex), when doing create/dropEventOperation +*/ +static Ndb *injector_ndb= 0; +static Ndb *schema_ndb= 0; + +/* + Mutex and condition used for interacting between client sql thread + and injector thread +*/ +pthread_t ndb_binlog_thread; +pthread_mutex_t injector_mutex; +pthread_cond_t injector_cond; + +/* NDB Injector thread (used for binlog creation) */ +static ulonglong ndb_latest_applied_binlog_epoch= 0; +static ulonglong ndb_latest_handled_binlog_epoch= 0; +static ulonglong ndb_latest_received_binlog_epoch= 0; + +NDB_SHARE *apply_status_share= 0; +NDB_SHARE *schema_share= 0; + +/* instantiated in storage/ndb/src/ndbapi/Ndbif.cpp */ +extern Uint64 g_latest_trans_gci; + +/* + Global variables for holding the binlog_index table reference +*/ +static TABLE *binlog_index= 0; +static TABLE_LIST binlog_tables; + +/* + Helper functions +*/ + +#ifndef DBUG_OFF +static void print_records(TABLE *table, const char *record) +{ + for (uint j= 0; j < table->s->fields; j++) + { + char buf[40]; + int pos= 0; + Field *field= table->field[j]; + const byte* field_ptr= field->ptr - table->record[0] + record; + int pack_len= field->pack_length(); + int n= pack_len < 10 ? pack_len : 10; + + for (int i= 0; i < n && pos < 20; i++) + { + pos+= sprintf(&buf[pos]," %x", (int) (unsigned char) field_ptr[i]); + } + buf[pos]= 0; + DBUG_PRINT("info",("[%u]field_ptr[0->%d]: %s", j, n, buf)); + } +} +#else +#define print_records(a,b) +#endif + + +#ifndef DBUG_OFF +static void dbug_print_table(const char *info, TABLE *table) +{ + if (table == 0) + { + DBUG_PRINT("info",("%s: (null)", info)); + return; + } + DBUG_PRINT("info", + ("%s: %s.%s s->fields: %d " + "reclength: %d rec_buff_length: %d record[0]: %lx " + "record[1]: %lx", + info, + table->s->db.str, + table->s->table_name.str, + table->s->fields, + table->s->reclength, + table->s->rec_buff_length, + table->record[0], + table->record[1])); + + for (unsigned int i= 0; i < table->s->fields; i++) + { + Field *f= table->field[i]; + DBUG_PRINT("info", + ("[%d] \"%s\"(0x%lx:%s%s%s%s%s%s) type: %d pack_length: %d " + "ptr: 0x%lx[+%d] null_bit: %u null_ptr: 0x%lx[+%d]", + i, + f->field_name, + f->flags, + (f->flags & PRI_KEY_FLAG) ? "pri" : "attr", + (f->flags & NOT_NULL_FLAG) ? "" : ",nullable", + (f->flags & UNSIGNED_FLAG) ? ",unsigned" : ",signed", + (f->flags & ZEROFILL_FLAG) ? ",zerofill" : "", + (f->flags & BLOB_FLAG) ? ",blob" : "", + (f->flags & BINARY_FLAG) ? ",binary" : "", + f->real_type(), + f->pack_length(), + f->ptr, f->ptr - table->record[0], + f->null_bit, + f->null_ptr, (byte*) f->null_ptr - table->record[0])); + if (f->type() == MYSQL_TYPE_BIT) + { + Field_bit *g= (Field_bit*) f; + DBUG_PRINT("MYSQL_TYPE_BIT",("field_length: %d bit_ptr: 0x%lx[+%d] " + "bit_ofs: %u bit_len: %u", + g->field_length, g->bit_ptr, + (byte*) g->bit_ptr-table->record[0], + g->bit_ofs, g->bit_len)); + } + } +} +#else +#define dbug_print_table(a,b) +#endif + + +/* + Run a query through mysql_parse + + Used to: + - purging the binlog_index + - creating the apply_status table +*/ +static void run_query(THD *thd, char *buf, char *end, + my_bool print_error, my_bool disable_binlog) +{ + ulong save_query_length= thd->query_length; + char *save_query= thd->query; + ulong save_thread_id= thd->variables.pseudo_thread_id; + ulonglong save_thd_options= thd->options; + DBUG_ASSERT(sizeof(save_thd_options) == sizeof(thd->options)); + NET save_net= thd->net; + + bzero((char*) &thd->net, sizeof(NET)); + thd->query_length= end - buf; + thd->query= buf; + thd->variables.pseudo_thread_id= thread_id; + if (disable_binlog) + thd->options&= ~OPTION_BIN_LOG; + + DBUG_PRINT("query", ("%s", thd->query)); + mysql_parse(thd, thd->query, thd->query_length); + + if (print_error && thd->query_error) + { + sql_print_error("NDB: %s: error %s %d %d %d", + buf, thd->net.last_error, thd->net.last_errno, + thd->net.report_error, thd->query_error); + } + + thd->options= save_thd_options; + thd->query_length= save_query_length; + thd->query= save_query; + thd->variables.pseudo_thread_id= save_thread_id; + thd->net= save_net; + + if (thd == injector_thd) + { + /* + running the query will close all tables, including the binlog_index + used in injector_thd + */ + binlog_index= 0; + } +} + +int +ndbcluster_binlog_open_table(THD *thd, NDB_SHARE *share, + TABLE_SHARE *table_share, TABLE *table) +{ + int error; + MEM_ROOT *mem_root= &share->mem_root; + DBUG_ENTER("ndbcluster_binlog_open_table"); + + init_tmp_table_share(table_share, share->db, 0, share->table_name, + share->key); + if ((error= open_table_def(thd, table_share, 0))) + { + sql_print_error("Unable to get table share for %s, error=%d", + share->key, error); + DBUG_PRINT("error", ("open_table_def failed %d", error)); + free_table_share(table_share); + my_free((gptr) table_share, MYF(0)); + my_free((gptr) table, MYF(0)); + DBUG_RETURN(error); + } + if ((error= open_table_from_share(thd, table_share, "", 0, + (uint) READ_ALL, 0, table, FALSE))) + { + sql_print_error("Unable to open table for %s, error=%d(%d)", + share->key, error, my_errno); + DBUG_PRINT("error", ("open_table_from_share failed %d", error)); + free_table_share(table_share); + my_free((gptr) table_share, MYF(0)); + my_free((gptr) table, MYF(0)); + DBUG_RETURN(error); + } + assign_new_table_id(table_share); + if (!table->record[1] || table->record[1] == table->record[0]) + { + table->record[1]= alloc_root(&table->mem_root, + table->s->rec_buff_length); + } + table->in_use= injector_thd; + + table->s->db.str= share->db; + table->s->db.length= strlen(share->db); + table->s->table_name.str= share->table_name; + table->s->table_name.length= strlen(share->table_name); + + share->table_share= table_share; + share->table= table; +#ifndef DBUG_OFF + dbug_print_table("table", table); +#endif + /* + ! do not touch the contents of the table + it may be in use by the injector thread + */ + share->ndb_value[0]= (NdbValue*) + alloc_root(mem_root, sizeof(NdbValue) * + (table->s->fields + 2 /*extra for hidden key and part key*/)); + share->ndb_value[1]= (NdbValue*) + alloc_root(mem_root, sizeof(NdbValue) * + (table->s->fields + 2 /*extra for hidden key and part key*/)); + + DBUG_RETURN(0); +} + + +/* + Initialize the binlog part of the NDB_SHARE +*/ +void ndbcluster_binlog_init_share(NDB_SHARE *share, TABLE *_table) +{ + THD *thd= current_thd; + MEM_ROOT *mem_root= &share->mem_root; + int do_event_op= ndb_binlog_running; + DBUG_ENTER("ndbcluster_binlog_init_share"); + + share->op= 0; + share->table= 0; + + if (!schema_share && + strcmp(share->db, NDB_REP_DB) == 0 && + strcmp(share->table_name, NDB_SCHEMA_TABLE) == 0) + do_event_op= 1; + + { + int i, no_nodes= g_ndb_cluster_connection->no_db_nodes(); + share->subscriber_bitmap= (MY_BITMAP*) + alloc_root(mem_root, no_nodes * sizeof(MY_BITMAP)); + for (i= 0; i < no_nodes; i++) + { + bitmap_init(&share->subscriber_bitmap[i], + (Uint32*)alloc_root(mem_root, max_ndb_nodes/8), + max_ndb_nodes, false); + bitmap_clear_all(&share->subscriber_bitmap[i]); + } + bitmap_init(&share->slock_bitmap, share->slock, + sizeof(share->slock)*8, false); + bitmap_clear_all(&share->slock_bitmap); + } + + if (!do_event_op) + { + if (_table) + { + if (_table->s->primary_key == MAX_KEY) + share->flags|= NSF_HIDDEN_PK; + if (_table->s->blob_fields != 0) + share->flags|= NSF_BLOB_FLAG; + } + else + { + share->flags|= NSF_NO_BINLOG; + } + DBUG_VOID_RETURN; + } + while (1) + { + int error; + TABLE_SHARE *table_share= + (TABLE_SHARE *) my_malloc(sizeof(*table_share), MYF(MY_WME)); + TABLE *table= (TABLE*) my_malloc(sizeof(*table), MYF(MY_WME)); + if ((error= ndbcluster_binlog_open_table(thd, share, table_share, table))) + break; + if (table->s->primary_key == MAX_KEY) + share->flags|= NSF_HIDDEN_PK; + if (table->s->blob_fields != 0) + share->flags|= NSF_BLOB_FLAG; + break; + } + DBUG_VOID_RETURN; +} + +/***************************************************************** + functions called from master sql client threads +****************************************************************/ + +/* + called in mysql_show_binlog_events and reset_logs to make sure we wait for + all events originating from this mysql server to arrive in the binlog + + Wait for the last epoch in which the last transaction is a part of. + + Wait a maximum of 30 seconds. +*/ +static void ndbcluster_binlog_wait(THD *thd) +{ + if (ndb_binlog_running) + { + DBUG_ENTER("ndbcluster_binlog_wait"); + const char *save_info= thd ? thd->proc_info : 0; + ulonglong wait_epoch= g_latest_trans_gci; + int count= 30; + if (thd) + thd->proc_info= "Waiting for ndbcluster binlog update to " + "reach current position"; + while (count && ndb_binlog_running && + ndb_latest_handled_binlog_epoch < wait_epoch) + { + count--; + sleep(1); + } + if (thd) + thd->proc_info= save_info; + DBUG_VOID_RETURN; + } +} + +/* + Called from MYSQL_LOG::reset_logs in log.cc when binlog is emptied +*/ +static int ndbcluster_reset_logs(THD *thd) +{ + if (!ndb_binlog_running) + return 0; + + DBUG_ENTER("ndbcluster_reset_logs"); + + /* + Wait for all events orifinating from this mysql server has + reached the binlog before continuing to reset + */ + ndbcluster_binlog_wait(thd); + + char buf[1024]; + char *end= strmov(buf, "DELETE FROM " NDB_REP_DB "." NDB_REP_TABLE); + + run_query(thd, buf, end, FALSE, TRUE); + + DBUG_RETURN(0); +} + +/* + Called from MYSQL_LOG::purge_logs in log.cc when the binlog "file" + is removed +*/ + +static int +ndbcluster_binlog_index_purge_file(THD *thd, const char *file) +{ + if (!ndb_binlog_running) + return 0; + + DBUG_ENTER("ndbcluster_binlog_index_purge_file"); + DBUG_PRINT("enter", ("file: %s", file)); + + char buf[1024]; + char *end= strmov(strmov(strmov(buf, + "DELETE FROM " + NDB_REP_DB "." NDB_REP_TABLE + " WHERE File='"), file), "'"); + + run_query(thd, buf, end, FALSE, TRUE); + + DBUG_RETURN(0); +} + +static void +ndbcluster_binlog_log_query(THD *thd, enum_binlog_command binlog_command, + const char *query, uint query_length, + const char *db, const char *table_name) +{ + DBUG_ENTER("ndbcluster_binlog_log_query"); + DBUG_PRINT("enter", ("db: %s table_name: %s query: %s", + db, table_name, query)); + enum SCHEMA_OP_TYPE type; + int log= 0; + switch (binlog_command) + { + case LOGCOM_CREATE_TABLE: + type= SOT_CREATE_TABLE; + break; + case LOGCOM_ALTER_TABLE: + type= SOT_ALTER_TABLE; + log= 1; + break; + case LOGCOM_RENAME_TABLE: + type= SOT_RENAME_TABLE; + break; + case LOGCOM_DROP_TABLE: + type= SOT_DROP_TABLE; + break; + case LOGCOM_CREATE_DB: + type= SOT_CREATE_DB; + log= 1; + break; + case LOGCOM_ALTER_DB: + type= SOT_ALTER_DB; + log= 1; + break; + case LOGCOM_DROP_DB: + type= SOT_DROP_DB; + break; + } + if (log) + { + ndbcluster_log_schema_op(thd, 0, query, query_length, + db, table_name, 0, 0, type); + } + DBUG_VOID_RETURN; +} + +/* + End use of the NDB Cluster table handler + - free all global variables allocated by + ndbcluster_init() +*/ + +static int ndbcluster_binlog_end(THD *thd) +{ + DBUG_ENTER("ndb_binlog_end"); + + if (!ndbcluster_util_inited) + DBUG_RETURN(0); + + // Kill ndb utility thread + (void) pthread_mutex_lock(&LOCK_ndb_util_thread); + DBUG_PRINT("exit",("killing ndb util thread: %lx", ndb_util_thread)); + (void) pthread_cond_signal(&COND_ndb_util_thread); + (void) pthread_mutex_unlock(&LOCK_ndb_util_thread); + +#ifdef HAVE_NDB_BINLOG + /* wait for injector thread to finish */ + if (ndb_binlog_thread_running > 0) + { + pthread_mutex_lock(&injector_mutex); + while (ndb_binlog_thread_running > 0) + { + struct timespec abstime; + set_timespec(abstime, 1); + pthread_cond_timedwait(&injector_cond, &injector_mutex, &abstime); + } + pthread_mutex_unlock(&injector_mutex); + } + + /* remove all shares */ + { + pthread_mutex_lock(&ndbcluster_mutex); + for (uint i= 0; i < ndbcluster_open_tables.records; i++) + { + NDB_SHARE *share= + (NDB_SHARE*) hash_element(&ndbcluster_open_tables, i); + if (share->table) + DBUG_PRINT("share", + ("table->s->db.table_name: %s.%s", + share->table->s->db.str, share->table->s->table_name.str)); + if (share->state != NSS_DROPPED && !--share->use_count) + real_free_share(&share); + else + { + DBUG_PRINT("share", + ("[%d] 0x%lx key: %s key_length: %d", + i, share, share->key, share->key_length)); + DBUG_PRINT("share", + ("db.tablename: %s.%s use_count: %d commit_count: %d", + share->db, share->table_name, + share->use_count, share->commit_count)); + } + } + pthread_mutex_unlock(&ndbcluster_mutex); + } +#endif + ndbcluster_util_inited= 0; + DBUG_RETURN(0); +} + +/***************************************************************** + functions called from slave sql client threads +****************************************************************/ +static void ndbcluster_reset_slave(THD *thd) +{ + if (!ndb_binlog_running) + return; + + DBUG_ENTER("ndbcluster_reset_slave"); + char buf[1024]; + char *end= strmov(buf, "DELETE FROM " NDB_REP_DB "." NDB_APPLY_TABLE); + run_query(thd, buf, end, FALSE, TRUE); + DBUG_VOID_RETURN; +} + +/* + Initialize the binlog part of the ndb handlerton +*/ +static int ndbcluster_binlog_func(THD *thd, enum_binlog_func fn, void *arg) +{ + switch(fn) + { + case BFN_RESET_LOGS: + ndbcluster_reset_logs(thd); + break; + case BFN_RESET_SLAVE: + ndbcluster_reset_slave(thd); + break; + case BFN_BINLOG_WAIT: + ndbcluster_binlog_wait(thd); + break; + case BFN_BINLOG_END: + ndbcluster_binlog_end(thd); + break; + case BFN_BINLOG_PURGE_FILE: + ndbcluster_binlog_index_purge_file(thd, (const char *)arg); + break; + } + return 0; +} + +void ndbcluster_binlog_init_handlerton() +{ + handlerton &h= ndbcluster_hton; + h.binlog_func= ndbcluster_binlog_func; + h.binlog_log_query= ndbcluster_binlog_log_query; +} + + + + + +/* + check the availability af the apply_status share + - return share, but do not increase refcount + - return 0 if there is no share +*/ +static NDB_SHARE *ndbcluster_check_apply_status_share() +{ + pthread_mutex_lock(&ndbcluster_mutex); + + void *share= hash_search(&ndbcluster_open_tables, + NDB_APPLY_TABLE_FILE, + sizeof(NDB_APPLY_TABLE_FILE) - 1); + DBUG_PRINT("info",("ndbcluster_check_apply_status_share %s %p", + NDB_APPLY_TABLE_FILE, share)); + pthread_mutex_unlock(&ndbcluster_mutex); + return (NDB_SHARE*) share; +} + +/* + check the availability af the schema share + - return share, but do not increase refcount + - return 0 if there is no share +*/ +static NDB_SHARE *ndbcluster_check_schema_share() +{ + pthread_mutex_lock(&ndbcluster_mutex); + + void *share= hash_search(&ndbcluster_open_tables, + NDB_SCHEMA_TABLE_FILE, + sizeof(NDB_SCHEMA_TABLE_FILE) - 1); + DBUG_PRINT("info",("ndbcluster_check_schema_share %s %p", + NDB_SCHEMA_TABLE_FILE, share)); + pthread_mutex_unlock(&ndbcluster_mutex); + return (NDB_SHARE*) share; +} + +/* + Create the apply_status table +*/ +static int ndbcluster_create_apply_status_table(THD *thd) +{ + DBUG_ENTER("ndbcluster_create_apply_status_table"); + + /* + Check if we already have the apply status table. + If so it should have been discovered at startup + and thus have a share + */ + + if (ndbcluster_check_apply_status_share()) + DBUG_RETURN(0); + + if (g_ndb_cluster_connection->get_no_ready() <= 0) + DBUG_RETURN(0); + + char buf[1024], *end; + + if (ndb_extra_logging) + sql_print_information("NDB: Creating " NDB_REP_DB "." NDB_APPLY_TABLE); + + /* + Check if apply status table exists in MySQL "dictionary" + if so, remove it since there is none in Ndb + */ + { + build_table_filename(buf, sizeof(buf), + NDB_REP_DB, NDB_APPLY_TABLE, reg_ext); + my_delete(buf, MYF(0)); + } + + /* + Note, updating this table schema must be reflected in ndb_restore + */ + end= strmov(buf, "CREATE TABLE IF NOT EXISTS " + NDB_REP_DB "." NDB_APPLY_TABLE + " ( server_id INT UNSIGNED NOT NULL," + " epoch BIGINT UNSIGNED NOT NULL, " + " PRIMARY KEY USING HASH (server_id) ) ENGINE=NDB"); + + run_query(thd, buf, end, TRUE, TRUE); + + DBUG_RETURN(0); +} + + +/* + Create the schema table +*/ +static int ndbcluster_create_schema_table(THD *thd) +{ + DBUG_ENTER("ndbcluster_create_schema_table"); + + /* + Check if we already have the schema table. + If so it should have been discovered at startup + and thus have a share + */ + + if (ndbcluster_check_schema_share()) + DBUG_RETURN(0); + + if (g_ndb_cluster_connection->get_no_ready() <= 0) + DBUG_RETURN(0); + + char buf[1024], *end; + + if (ndb_extra_logging) + sql_print_information("NDB: Creating " NDB_REP_DB "." NDB_SCHEMA_TABLE); + + /* + Check if schema table exists in MySQL "dictionary" + if so, remove it since there is none in Ndb + */ + { + build_table_filename(buf, sizeof(buf), + NDB_REP_DB, NDB_SCHEMA_TABLE, reg_ext); + my_delete(buf, MYF(0)); + } + + /* + Update the defines below to reflect the table schema + */ + end= strmov(buf, "CREATE TABLE IF NOT EXISTS " + NDB_REP_DB "." NDB_SCHEMA_TABLE + " ( db VARCHAR(63) NOT NULL," + " name VARCHAR(63) NOT NULL," + " slock BINARY(32) NOT NULL," + " query VARCHAR(4094) NOT NULL," + " node_id INT UNSIGNED NOT NULL," + " epoch BIGINT UNSIGNED NOT NULL," + " id INT UNSIGNED NOT NULL," + " version INT UNSIGNED NOT NULL," + " type INT UNSIGNED NOT NULL," + " PRIMARY KEY USING HASH (db,name) ) ENGINE=NDB"); + + run_query(thd, buf, end, TRUE, TRUE); + + DBUG_RETURN(0); +} + +void ndbcluster_setup_binlog_table_shares(THD *thd) +{ + int done_find_all_files= 0; + if (!apply_status_share && + ndbcluster_check_apply_status_share() == 0) + { + if (!done_find_all_files) + { + ndbcluster_find_all_files(thd); + done_find_all_files= 1; + } + ndbcluster_create_apply_status_table(thd); + } + if (!schema_share && + ndbcluster_check_schema_share() == 0) + { + if (!done_find_all_files) + { + ndbcluster_find_all_files(thd); + done_find_all_files= 1; + } + ndbcluster_create_schema_table(thd); + } +} + +/* + Defines and struct for schema table. + Should reflect table definition above. +*/ +#define SCHEMA_DB_I 0u +#define SCHEMA_NAME_I 1u +#define SCHEMA_SLOCK_I 2u +#define SCHEMA_QUERY_I 3u +#define SCHEMA_NODE_ID_I 4u +#define SCHEMA_EPOCH_I 5u +#define SCHEMA_ID_I 6u +#define SCHEMA_VERSION_I 7u +#define SCHEMA_TYPE_I 8u +#define SCHEMA_SIZE 9u +#define SCHEMA_SLOCK_SIZE 32u +#define SCHEMA_QUERY_SIZE 4096u + +struct Cluster_replication_schema +{ + unsigned char db_length; + char db[64]; + unsigned char name_length; + char name[64]; + unsigned char slock_length; + uint32 slock[SCHEMA_SLOCK_SIZE/4]; + unsigned short query_length; + char query[SCHEMA_QUERY_SIZE]; + Uint64 epoch; + uint32 node_id; + uint32 id; + uint32 version; + uint32 type; +}; + +/* + Transfer schema table data into corresponding struct +*/ +static void ndbcluster_get_schema(TABLE *table, + Cluster_replication_schema *s) +{ + Field **field; + /* db varchar 1 length byte */ + field= table->field; + s->db_length= *(uint8*)(*field)->ptr; + DBUG_ASSERT(s->db_length <= (*field)->field_length); + DBUG_ASSERT((*field)->field_length + 1 == sizeof(s->db)); + memcpy(s->db, (*field)->ptr + 1, s->db_length); + s->db[s->db_length]= 0; + /* name varchar 1 length byte */ + field++; + s->name_length= *(uint8*)(*field)->ptr; + DBUG_ASSERT(s->name_length <= (*field)->field_length); + DBUG_ASSERT((*field)->field_length + 1 == sizeof(s->name)); + memcpy(s->name, (*field)->ptr + 1, s->name_length); + s->name[s->name_length]= 0; + /* slock fixed length */ + field++; + s->slock_length= (*field)->field_length; + DBUG_ASSERT((*field)->field_length == sizeof(s->slock)); + memcpy(s->slock, (*field)->ptr, s->slock_length); + /* query varchar 2 length bytes */ + field++; + s->query_length= uint2korr((*field)->ptr); + DBUG_ASSERT(s->query_length <= (*field)->field_length); + DBUG_ASSERT((*field)->field_length + 2 == sizeof(s->query)); + memcpy(s->query, (*field)->ptr + 2, s->query_length); + s->query[s->query_length]= 0; + /* node_id */ + field++; + s->node_id= ((Field_long *)*field)->val_int(); + /* epoch */ + field++; + s->epoch= ((Field_long *)*field)->val_int(); + /* id */ + field++; + s->id= ((Field_long *)*field)->val_int(); + /* version */ + field++; + s->version= ((Field_long *)*field)->val_int(); + /* type */ + field++; + s->type= ((Field_long *)*field)->val_int(); +} + +/* + helper function to pack a ndb varchar +*/ +static char *ndb_pack_varchar(const NDBCOL *col, char *buf, + const char *str, int sz) +{ + switch (col->getArrayType()) + { + case NDBCOL::ArrayTypeFixed: + memcpy(buf, str, sz); + break; + case NDBCOL::ArrayTypeShortVar: + *(unsigned char*)buf= (unsigned char)sz; + memcpy(buf + 1, str, sz); + break; + case NDBCOL::ArrayTypeMediumVar: + int2store(buf, sz); + memcpy(buf + 2, str, sz); + break; + } + return buf; +} + +/* + log query in schema table +*/ +int ndbcluster_log_schema_op(THD *thd, NDB_SHARE *share, + const char *query, int query_length, + const char *db, const char *table_name, + uint32 ndb_table_id, + uint32 ndb_table_version, + enum SCHEMA_OP_TYPE type, + const char *old_db, const char *old_table_name) +{ + DBUG_ENTER("ndbcluster_log_schema_op"); + Thd_ndb *thd_ndb= get_thd_ndb(thd); + if (!thd_ndb) + { + if (!(thd_ndb= ha_ndbcluster::seize_thd_ndb())) + { + sql_print_error("Could not allocate Thd_ndb object"); + DBUG_RETURN(1); + } + set_thd_ndb(thd, thd_ndb); + } + + DBUG_PRINT("enter", + ("query: %s db: %s table_name: %s thd_ndb->options: %d", + query, db, table_name, thd_ndb->options)); + if (!schema_share || thd_ndb->options & TNO_NO_LOG_SCHEMA_OP) + { + DBUG_RETURN(0); + } + + char tmp_buf2[FN_REFLEN]; + int get_a_share= 0; + switch (type) + { + case SOT_DROP_TABLE: + /* drop database command, do not log at drop table */ + if (thd->lex->sql_command == SQLCOM_DROP_DB) + DBUG_RETURN(0); + /* redo the drop table query as is may contain several tables */ + query= tmp_buf2; + query_length= (uint) (strxmov(tmp_buf2, "drop table `", + table_name, "`", NullS) - tmp_buf2); + if (!share) + get_a_share= 1; + break; + case SOT_RENAME_TABLE: + /* redo the rename table query as is may contain several tables */ + query= tmp_buf2; + query_length= (uint) (strxmov(tmp_buf2, "rename table `", + old_db, ".", old_table_name, "` to `", + db, ".", table_name, "`", NullS) - tmp_buf2); + if (!share) + get_a_share= 1; + break; + case SOT_CREATE_TABLE: + // fall through + case SOT_ALTER_TABLE: + if (!share) + get_a_share= 1; + break; + case SOT_DROP_DB: + break; + case SOT_CREATE_DB: + break; + case SOT_ALTER_DB: + break; + case SOT_TABLESPACE: + break; + case SOT_LOGFILE_GROUP: + break; + default: + abort(); /* should not happen, programming error */ + } + + if (get_a_share) + { + char key[FN_REFLEN]; + build_table_filename(key, sizeof(key), db, table_name, ""); + share= get_share(key, 0, false, false); + } + + const NdbError *ndb_error= 0; + uint32 node_id= g_ndb_cluster_connection->node_id(); + Uint64 epoch= 0; + MY_BITMAP schema_subscribers; + uint32 bitbuf[sizeof(schema_share->slock)/4]; + { + int i; + bitmap_init(&schema_subscribers, bitbuf, sizeof(bitbuf)*8, false); + bitmap_set_all(&schema_subscribers); + (void) pthread_mutex_lock(&schema_share->mutex); + for (i= 0; i < ndb_number_of_storage_nodes; i++) + { + MY_BITMAP *table_subscribers= &schema_share->subscriber_bitmap[i]; + if (!bitmap_is_clear_all(table_subscribers)) + bitmap_intersect(&schema_subscribers, + table_subscribers); + } + (void) pthread_mutex_unlock(&schema_share->mutex); + bitmap_clear_bit(&schema_subscribers, node_id); + + if (share) + { + (void) pthread_mutex_lock(&share->mutex); + memcpy(share->slock, schema_subscribers.bitmap, sizeof(share->slock)); + (void) pthread_mutex_unlock(&share->mutex); + } + + DBUG_DUMP("schema_subscribers", (char*)schema_subscribers.bitmap, + no_bytes_in_map(&schema_subscribers)); + DBUG_PRINT("info", ("bitmap_is_clear_all(&schema_subscribers): %d", + bitmap_is_clear_all(&schema_subscribers))); + } + + Ndb *ndb= thd_ndb->ndb; + char save_db[FN_REFLEN]; + strcpy(save_db, ndb->getDatabaseName()); + + char tmp_buf[SCHEMA_QUERY_SIZE]; + NDBDICT *dict= ndb->getDictionary(); + ndb->setDatabaseName(NDB_REP_DB); + const NDBTAB *ndbtab= dict->getTable(NDB_SCHEMA_TABLE); + NdbTransaction *trans= 0; + int retries= 100; + const NDBCOL *col[SCHEMA_SIZE]; + unsigned sz[SCHEMA_SIZE]; + + if (ndbtab == 0) + { + if (strcmp(NDB_REP_DB, db) != 0 || + strcmp(NDB_SCHEMA_TABLE, table_name)) + { + ndb_error= &dict->getNdbError(); + } + goto end; + } + + { + uint i; + for (i= 0; i < SCHEMA_SIZE; i++) + { + col[i]= ndbtab->getColumn(i); + sz[i]= col[i]->getLength(); + DBUG_ASSERT(sz[i] <= sizeof(tmp_buf)); + } + } + + while (1) + { + if ((trans= ndb->startTransaction()) == 0) + goto err; + { + NdbOperation *op= 0; + int r= 0; + r|= (op= trans->getNdbOperation(ndbtab)) == 0; + DBUG_ASSERT(r == 0); + r|= op->writeTuple(); + DBUG_ASSERT(r == 0); + + /* db */ + ndb_pack_varchar(col[SCHEMA_DB_I], tmp_buf, db, strlen(db)); + r|= op->equal(SCHEMA_DB_I, tmp_buf); + DBUG_ASSERT(r == 0); + /* name */ + ndb_pack_varchar(col[SCHEMA_NAME_I], tmp_buf, table_name, + strlen(table_name)); + r|= op->equal(SCHEMA_NAME_I, tmp_buf); + DBUG_ASSERT(r == 0); + /* slock */ + DBUG_ASSERT(sz[SCHEMA_SLOCK_I] == sizeof(bitbuf)); + r|= op->setValue(SCHEMA_SLOCK_I, (char*)schema_subscribers.bitmap); + DBUG_ASSERT(r == 0); + /* query */ + ndb_pack_varchar(col[SCHEMA_QUERY_I], tmp_buf, query, query_length); + r|= op->setValue(SCHEMA_QUERY_I, tmp_buf); + DBUG_ASSERT(r == 0); + /* node_id */ + r|= op->setValue(SCHEMA_NODE_ID_I, node_id); + DBUG_ASSERT(r == 0); + /* epoch */ + r|= op->setValue(SCHEMA_EPOCH_I, epoch); + DBUG_ASSERT(r == 0); + /* id */ + r|= op->setValue(SCHEMA_ID_I, ndb_table_id); + DBUG_ASSERT(r == 0); + /* version */ + r|= op->setValue(SCHEMA_VERSION_I, ndb_table_version); + DBUG_ASSERT(r == 0); + /* type */ + r|= op->setValue(SCHEMA_TYPE_I, (uint32)type); + DBUG_ASSERT(r == 0); + } + if (trans->execute(NdbTransaction::Commit) == 0) + { + dict->forceGCPWait(); + DBUG_PRINT("info", ("logged: %s", query)); + break; + } +err: + const NdbError *this_error= trans ? + &trans->getNdbError() : &ndb->getNdbError(); + if (this_error->status == NdbError::TemporaryError) + { + if (retries--) + { + if (trans) + ndb->closeTransaction(trans); + continue; // retry + } + } + ndb_error= this_error; + break; + } +end: + if (ndb_error) + push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR, + ER_GET_ERRMSG, ER(ER_GET_ERRMSG), + ndb_error->code, + ndb_error->message, + "Could not log query '%s' on other mysqld's"); + + if (trans) + ndb->closeTransaction(trans); + ndb->setDatabaseName(save_db); + + /* + Wait for other mysqld's to acknowledge the table operation + */ + if (ndb_error == 0 && + (type == SOT_CREATE_TABLE || + type == SOT_RENAME_TABLE || + type == SOT_ALTER_TABLE) && + !bitmap_is_clear_all(&schema_subscribers)) + { + int max_timeout= 10; + (void) pthread_mutex_lock(&share->mutex); + while (1) + { + struct timespec abstime; + int i; + set_timespec(abstime, 1); + (void) pthread_cond_timedwait(&injector_cond, + &share->mutex, + &abstime); + + (void) pthread_mutex_lock(&schema_share->mutex); + for (i= 0; i < ndb_number_of_storage_nodes; i++) + { + /* remove any unsubscribed from schema_subscribers */ + MY_BITMAP *tmp= &schema_share->subscriber_bitmap[i]; + if (!bitmap_is_clear_all(tmp)) + bitmap_intersect(&schema_subscribers, tmp); + } + (void) pthread_mutex_unlock(&schema_share->mutex); + + /* remove any unsubscribed from share->slock */ + bitmap_intersect(&share->slock_bitmap, &schema_subscribers); + + DBUG_DUMP("share->slock_bitmap.bitmap", (char*)share->slock_bitmap.bitmap, + no_bytes_in_map(&share->slock_bitmap)); + + if (bitmap_is_clear_all(&share->slock_bitmap)) + break; + + max_timeout--; + if (max_timeout == 0) + { + sql_print_error("NDB create table: timed out. Ignoring..."); + break; + } + if (ndb_extra_logging) + sql_print_information("NDB create table: " + "waiting max %u sec for create table %s.", + max_timeout, share->key); + } + (void) pthread_mutex_unlock(&share->mutex); + } + + if (get_a_share) + free_share(&share); + + DBUG_RETURN(0); +} + +/* + acknowledge handling of schema operation +*/ +static int +ndbcluster_update_slock(THD *thd, + const char *db, + const char *table_name) +{ + DBUG_ENTER("ndbcluster_update_slock"); + if (!schema_share) + { + DBUG_RETURN(0); + } + + const NdbError *ndb_error= 0; + uint32 node_id= g_ndb_cluster_connection->node_id(); + Ndb *ndb= check_ndb_in_thd(thd); + char save_db[FN_HEADLEN]; + strcpy(save_db, ndb->getDatabaseName()); + + char tmp_buf[SCHEMA_QUERY_SIZE]; + NDBDICT *dict= ndb->getDictionary(); + ndb->setDatabaseName(NDB_REP_DB); + const NDBTAB *ndbtab= dict->getTable(NDB_SCHEMA_TABLE); + NdbTransaction *trans= 0; + int retries= 100; + const NDBCOL *col[SCHEMA_SIZE]; + unsigned sz[SCHEMA_SIZE]; + + MY_BITMAP slock; + uint32 bitbuf[SCHEMA_SLOCK_SIZE/4]; + bitmap_init(&slock, bitbuf, sizeof(bitbuf)*8, false); + + if (ndbtab == 0) + { + abort(); + DBUG_RETURN(0); + } + + { + uint i; + for (i= 0; i < SCHEMA_SIZE; i++) + { + col[i]= ndbtab->getColumn(i); + sz[i]= col[i]->getLength(); + DBUG_ASSERT(sz[i] <= sizeof(tmp_buf)); + } + } + + while (1) + { + if ((trans= ndb->startTransaction()) == 0) + goto err; + { + NdbOperation *op= 0; + int r= 0; + + /* read the bitmap exlusive */ + r|= (op= trans->getNdbOperation(ndbtab)) == 0; + DBUG_ASSERT(r == 0); + r|= op->readTupleExclusive(); + DBUG_ASSERT(r == 0); + + /* db */ + ndb_pack_varchar(col[SCHEMA_DB_I], tmp_buf, db, strlen(db)); + r|= op->equal(SCHEMA_DB_I, tmp_buf); + DBUG_ASSERT(r == 0); + /* name */ + ndb_pack_varchar(col[SCHEMA_NAME_I], tmp_buf, table_name, + strlen(table_name)); + r|= op->equal(SCHEMA_NAME_I, tmp_buf); + DBUG_ASSERT(r == 0); + /* slock */ + r|= op->getValue(SCHEMA_SLOCK_I, (char*)slock.bitmap) == 0; + DBUG_ASSERT(r == 0); + } + if (trans->execute(NdbTransaction::NoCommit)) + goto err; + bitmap_clear_bit(&slock, node_id); + { + NdbOperation *op= 0; + int r= 0; + + /* now update the tuple */ + r|= (op= trans->getNdbOperation(ndbtab)) == 0; + DBUG_ASSERT(r == 0); + r|= op->updateTuple(); + DBUG_ASSERT(r == 0); + + /* db */ + ndb_pack_varchar(col[SCHEMA_DB_I], tmp_buf, db, strlen(db)); + r|= op->equal(SCHEMA_DB_I, tmp_buf); + DBUG_ASSERT(r == 0); + /* name */ + ndb_pack_varchar(col[SCHEMA_NAME_I], tmp_buf, table_name, + strlen(table_name)); + r|= op->equal(SCHEMA_NAME_I, tmp_buf); + DBUG_ASSERT(r == 0); + /* slock */ + r|= op->setValue(SCHEMA_SLOCK_I, (char*)slock.bitmap); + DBUG_ASSERT(r == 0); + /* node_id */ + r|= op->setValue(SCHEMA_NODE_ID_I, node_id); + DBUG_ASSERT(r == 0); + /* type */ + r|= op->setValue(SCHEMA_TYPE_I, (uint32)SOT_CLEAR_SLOCK); + DBUG_ASSERT(r == 0); + } + if (trans->execute(NdbTransaction::Commit) == 0) + { + dict->forceGCPWait(); + DBUG_PRINT("info", ("node %d cleared lock on '%s.%s'", + node_id, db, table_name)); + break; + } + err: + const NdbError *this_error= trans ? + &trans->getNdbError() : &ndb->getNdbError(); + if (this_error->status == NdbError::TemporaryError) + { + if (retries--) + { + if (trans) + ndb->closeTransaction(trans); + continue; // retry + } + } + ndb_error= this_error; + break; + } +end: + if (ndb_error) + push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR, + ER_GET_ERRMSG, ER(ER_GET_ERRMSG), + ndb_error->code, + ndb_error->message, + "Could not release lock on '%s.%s'", + db, table_name); + if (trans) + ndb->closeTransaction(trans); + ndb->setDatabaseName(save_db); + DBUG_RETURN(0); +} + +/* + Handle _non_ data events from the storage nodes +*/ +int +ndb_handle_schema_change(THD *thd, Ndb *ndb, NdbEventOperation *pOp, + NDB_SHARE *share) +{ + DBUG_ENTER("ndb_handle_schema_change"); + int remote_drop_table= 0, do_close_cached_tables= 0; + const char *dbname= share->table->s->db.str; + const char *tabname= share->table->s->table_name.str; + bool online_alter_table= (pOp->getEventType() == NDBEVENT::TE_ALTER && + pOp->tableFrmChanged()); + + if (pOp->getEventType() != NDBEVENT::TE_CLUSTER_FAILURE && + (uint) pOp->getReqNodeId() != g_ndb_cluster_connection->node_id()) + { + TABLE_SHARE *table_share= share->table->s; + TABLE* table= share->table; + + /* + Invalidate table and all it's indexes + */ + ndb->setDatabaseName(share->table->s->db.str); + Thd_ndb *thd_ndb= get_thd_ndb(thd); + DBUG_ASSERT(thd_ndb != NULL); + Ndb* old_ndb= thd_ndb->ndb; + thd_ndb->ndb= ndb; + ha_ndbcluster table_handler(table_share); + table_handler.set_dbname(share->key); + table_handler.set_tabname(share->key); + table_handler.open_indexes(ndb, table, TRUE); + table_handler.invalidate_dictionary_cache(TRUE); + thd_ndb->ndb= old_ndb; + + if (online_alter_table) + { + char key[FN_REFLEN]; + const void *data= 0, *pack_data= 0; + uint length, pack_length; + int error; + NDBDICT *dict= ndb->getDictionary(); + const NDBTAB *altered_table= pOp->getTable(); + + DBUG_PRINT("info", ("Detected frm change of table %s.%s", + dbname, tabname)); + build_table_filename(key, FN_LEN-1, dbname, tabname, NullS); + /* + If the frm of the altered table is different than the one on + disk then overwrite it with the new table definition + */ + if (readfrm(key, &data, &length) == 0 && + packfrm(data, length, &pack_data, &pack_length) == 0 && + cmp_frm(altered_table, pack_data, pack_length)) + { + DBUG_DUMP("frm", (char*)altered_table->getFrmData(), + altered_table->getFrmLength()); + pthread_mutex_lock(&LOCK_open); + const NDBTAB *old= dict->getTable(tabname); + if (!old && + old->getObjectVersion() != altered_table->getObjectVersion()) + dict->putTable(altered_table); + + if ((error= unpackfrm(&data, &length, altered_table->getFrmData())) || + (error= writefrm(key, data, length))) + { + sql_print_information("NDB: Failed write frm for %s.%s, error %d", + dbname, tabname, error); + } + close_cached_tables((THD*) 0, 0, (TABLE_LIST*) 0, TRUE); + if ((error= ndbcluster_binlog_open_table(thd, share, + table_share, table))) + sql_print_information("NDB: Failed to re-open table %s.%s", + dbname, tabname); + pthread_mutex_unlock(&LOCK_open); + } + } + remote_drop_table= 1; + } + + // If only frm was changed continue replicating + if (online_alter_table) + { + /* Signal ha_ndbcluster::alter_table that drop is done */ + (void) pthread_cond_signal(&injector_cond); + DBUG_RETURN(0); + } + + (void) pthread_mutex_lock(&share->mutex); + DBUG_ASSERT(share->op == pOp || share->op_old == pOp); + if (share->op_old == pOp) + share->op_old= 0; + else + share->op= 0; + // either just us or drop table handling as well + + /* Signal ha_ndbcluster::delete/rename_table that drop is done */ + (void) pthread_mutex_unlock(&share->mutex); + (void) pthread_cond_signal(&injector_cond); + + pthread_mutex_lock(&ndbcluster_mutex); + free_share(&share, TRUE); + if (remote_drop_table && share && share->state != NSS_DROPPED) + { + DBUG_PRINT("info", ("remote drop table")); + if (share->use_count != 1) + do_close_cached_tables= 1; + share->state= NSS_DROPPED; + free_share(&share, TRUE); + } + pthread_mutex_unlock(&ndbcluster_mutex); + + share= 0; + pOp->setCustomData(0); + + pthread_mutex_lock(&injector_mutex); + injector_ndb->dropEventOperation(pOp); + pOp= 0; + pthread_mutex_unlock(&injector_mutex); + + if (do_close_cached_tables) + close_cached_tables((THD*) 0, 0, (TABLE_LIST*) 0); + return 0; +} + +static int +ndb_binlog_thread_handle_schema_event(THD *thd, Ndb *ndb, + NdbEventOperation *pOp, + List<Cluster_replication_schema> + *post_epoch_log_list, + List<Cluster_replication_schema> + *post_epoch_unlock_list, + MEM_ROOT *mem_root) +{ + DBUG_ENTER("ndb_binlog_thread_handle_schema_event"); + NDB_SHARE *share= (NDB_SHARE *)pOp->getCustomData(); + if (share && schema_share == share) + { + NDBEVENT::TableEvent ev_type= pOp->getEventType(); + DBUG_PRINT("enter", ("%s.%s ev_type: %d", + share->db, share->table_name, ev_type)); + switch (ev_type) + { + case NDBEVENT::TE_UPDATE: + /* fall through */ + case NDBEVENT::TE_INSERT: + { + Cluster_replication_schema *schema= (Cluster_replication_schema *) + sql_alloc(sizeof(Cluster_replication_schema)); + MY_BITMAP slock; + bitmap_init(&slock, schema->slock, 8*SCHEMA_SLOCK_SIZE, false); + uint node_id= g_ndb_cluster_connection->node_id(); + ndbcluster_get_schema(share->table, schema); + if (schema->node_id != node_id) + { + int log_query= 0; + DBUG_PRINT("info", ("log query_length: %d query: '%s'", + schema->query_length, schema->query)); + switch ((enum SCHEMA_OP_TYPE)schema->type) + { + case SOT_DROP_TABLE: + /* binlog dropping table after any table operations */ + if (ndb_binlog_running) + post_epoch_log_list->push_back(schema, mem_root); + log_query= 0; + break; + case SOT_RENAME_TABLE: + /* fall through */ + case SOT_ALTER_TABLE: + if (ndb_binlog_running) + { + log_query= 1; + break; /* discovery will be handled by binlog */ + } + /* fall through */ + case SOT_CREATE_TABLE: + pthread_mutex_lock(&LOCK_open); + if (ndb_create_table_from_engine(thd, schema->db, schema->name)) + { + sql_print_error("Could not discover table '%s.%s' from " + "binlog schema event '%s' from node %d", + schema->db, schema->name, schema->query, + schema->node_id); + } + pthread_mutex_unlock(&LOCK_open); + log_query= 1; + break; + case SOT_DROP_DB: + run_query(thd, schema->query, + schema->query + schema->query_length, + TRUE, /* print error */ + TRUE); /* don't binlog the query */ + /* binlog dropping database after any table operations */ + if (ndb_binlog_running) + post_epoch_log_list->push_back(schema, mem_root); + log_query= 0; + break; + case SOT_CREATE_DB: + /* fall through */ + case SOT_ALTER_DB: + run_query(thd, schema->query, + schema->query + schema->query_length, + TRUE, /* print error */ + FALSE); /* binlog the query */ + log_query= 0; + break; + case SOT_CLEAR_SLOCK: + { + char key[FN_REFLEN]; + build_table_filename(key, sizeof(key), + schema->db, schema->name, ""); + NDB_SHARE *share= get_share(key, 0, false, false); + if (share) + { + pthread_mutex_lock(&share->mutex); + memcpy(share->slock, schema->slock, sizeof(share->slock)); + DBUG_DUMP("share->slock_bitmap.bitmap", + (char*)share->slock_bitmap.bitmap, + no_bytes_in_map(&share->slock_bitmap)); + pthread_mutex_unlock(&share->mutex); + pthread_cond_signal(&injector_cond); + free_share(&share); + } + DBUG_RETURN(0); + } + case SOT_TABLESPACE: + case SOT_LOGFILE_GROUP: + log_query= 1; + break; + } + + /* signal that schema operation has been handled */ + if ((enum SCHEMA_OP_TYPE)schema->type != SOT_CLEAR_SLOCK) + { + DBUG_DUMP("slock", (char*)schema->slock, schema->slock_length); + if (bitmap_is_set(&slock, node_id)) + { + /* + If it is an SOT_ALTER_TABLE we need to acknowledge the + schema operation _after_ all the events have been + processed so that all schema events coming through + the event operation has been processed + */ + if ((enum SCHEMA_OP_TYPE)schema->type == SOT_ALTER_TABLE) + post_epoch_unlock_list->push_back(schema, mem_root); + else + ndbcluster_update_slock(thd, schema->db, schema->name); + } + } + + if (log_query && ndb_binlog_running) + { + char *thd_db_save= thd->db; + thd->db= schema->db; + thd->binlog_query(THD::STMT_QUERY_TYPE, schema->query, + schema->query_length, FALSE, + schema->name[0] == 0 || thd->db[0] == 0); + thd->db= thd_db_save; + } + } + } + break; + case NDBEVENT::TE_DELETE: + // skip + break; + case NDBEVENT::TE_ALTER: + if (pOp->tableNameChanged()) + { + DBUG_PRINT("info", ("Detected name change of table %s.%s", + share->db, share->table_name)); + /* do the rename of the table in the share */ + share->table->s->db.str= share->db; + share->table->s->db.length= strlen(share->db); + share->table->s->table_name.str= share->table_name; + share->table->s->table_name.length= strlen(share->table_name); + } + ndb_handle_schema_change(thd, ndb, pOp, share); + break; + case NDBEVENT::TE_CLUSTER_FAILURE: + case NDBEVENT::TE_DROP: + free_share(&schema_share); + schema_share= 0; + ndb_handle_schema_change(thd, ndb, pOp, share); + break; + case NDBEVENT::TE_NODE_FAILURE: + { + uint8 node_id= g_node_id_map[pOp->getNdbdNodeId()]; + DBUG_ASSERT(node_id != 0xFF); + (void) pthread_mutex_lock(&share->mutex); + bitmap_clear_all(&share->subscriber_bitmap[node_id]); + DBUG_PRINT("info",("NODE_FAILURE UNSUBSCRIBE[%d]", node_id)); + if (ndb_extra_logging) + { + sql_print_information("NDB Binlog: Node: %d, down," + " Subscriber bitmask %x%x", + pOp->getNdbdNodeId(), + share->subscriber_bitmap[node_id].bitmap[1], + share->subscriber_bitmap[node_id].bitmap[0]); + } + (void) pthread_mutex_unlock(&share->mutex); + (void) pthread_cond_signal(&injector_cond); + break; + } + case NDBEVENT::TE_SUBSCRIBE: + { + uint8 node_id= g_node_id_map[pOp->getNdbdNodeId()]; + uint8 req_id= pOp->getReqNodeId(); + DBUG_ASSERT(req_id != 0 && node_id != 0xFF); + (void) pthread_mutex_lock(&share->mutex); + bitmap_set_bit(&share->subscriber_bitmap[node_id], req_id); + DBUG_PRINT("info",("SUBSCRIBE[%d] %d", node_id, req_id)); + if (ndb_extra_logging) + { + sql_print_information("NDB Binlog: Node: %d, subscribe from node %d," + " Subscriber bitmask %x%x", + pOp->getNdbdNodeId(), + req_id, + share->subscriber_bitmap[node_id].bitmap[1], + share->subscriber_bitmap[node_id].bitmap[0]); + } + (void) pthread_mutex_unlock(&share->mutex); + (void) pthread_cond_signal(&injector_cond); + break; + } + case NDBEVENT::TE_UNSUBSCRIBE: + { + uint8 node_id= g_node_id_map[pOp->getNdbdNodeId()]; + uint8 req_id= pOp->getReqNodeId(); + DBUG_ASSERT(req_id != 0 && node_id != 0xFF); + (void) pthread_mutex_lock(&share->mutex); + bitmap_clear_bit(&share->subscriber_bitmap[node_id], req_id); + DBUG_PRINT("info",("UNSUBSCRIBE[%d] %d", node_id, req_id)); + if (ndb_extra_logging) + { + sql_print_information("NDB Binlog: Node: %d, unsubscribe from node %d," + " Subscriber bitmask %x%x", + pOp->getNdbdNodeId(), + req_id, + share->subscriber_bitmap[node_id].bitmap[1], + share->subscriber_bitmap[node_id].bitmap[0]); + } + (void) pthread_mutex_unlock(&share->mutex); + (void) pthread_cond_signal(&injector_cond); + break; + } + default: + sql_print_error("NDB Binlog: unknown non data event %d for %s. " + "Ignoring...", (unsigned) ev_type, share->key); + } + } + DBUG_RETURN(0); +} + +/* + Timer class for doing performance measurements +*/ + +/********************************************************************* + Internal helper functions for handeling of the cluster replication tables + - binlog_index + - apply_status +*********************************************************************/ + +/* + struct to hold the data to be inserted into the + binlog_index table +*/ +struct Binlog_index_row { + ulonglong gci; + const char *master_log_file; + ulonglong master_log_pos; + ulonglong n_inserts; + ulonglong n_updates; + ulonglong n_deletes; + ulonglong n_schemaops; +}; + +/* + Open the binlog_index table +*/ +static int open_binlog_index(THD *thd, TABLE_LIST *tables, + TABLE **binlog_index) +{ + static char repdb[]= NDB_REP_DB; + static char reptable[]= NDB_REP_TABLE; + const char *save_proc_info= thd->proc_info; + + bzero((char*) tables, sizeof(*tables)); + tables->db= repdb; + tables->alias= tables->table_name= reptable; + tables->lock_type= TL_WRITE; + thd->proc_info= "Opening " NDB_REP_DB "." NDB_REP_TABLE; + tables->required_type= FRMTYPE_TABLE; + uint counter; + thd->clear_error(); + if (open_tables(thd, &tables, &counter, MYSQL_LOCK_IGNORE_FLUSH)) + { + sql_print_error("NDB Binlog: Opening binlog_index: %d, '%s'", + thd->net.last_errno, + thd->net.last_error ? thd->net.last_error : ""); + thd->proc_info= save_proc_info; + return -1; + } + *binlog_index= tables->table; + thd->proc_info= save_proc_info; + return 0; +} + +/* + Insert one row in the binlog_index + + declared friend in handler.h to be able to call write_row directly + so that this insert is not replicated +*/ +int ndb_add_binlog_index(THD *thd, void *_row) +{ + Binlog_index_row &row= *(Binlog_index_row *) _row; + int error= 0; + bool need_reopen; + for ( ; ; ) /* loop for need_reopen */ + { + if (!binlog_index && open_binlog_index(thd, &binlog_tables, &binlog_index)) + { + error= -1; + goto add_binlog_index_err; + } + + if (lock_tables(thd, &binlog_tables, 1, &need_reopen)) + { + if (need_reopen) + { + TABLE_LIST *p_binlog_tables= &binlog_tables; + close_tables_for_reopen(thd, &p_binlog_tables); + binlog_index= 0; + continue; + } + sql_print_error("NDB Binlog: Unable to lock table binlog_index"); + error= -1; + goto add_binlog_index_err; + } + break; + } + + binlog_index->field[0]->store(row.master_log_pos); + binlog_index->field[1]->store(row.master_log_file, + strlen(row.master_log_file), + &my_charset_bin); + binlog_index->field[2]->store(row.gci); + binlog_index->field[3]->store(row.n_inserts); + binlog_index->field[4]->store(row.n_updates); + binlog_index->field[5]->store(row.n_deletes); + binlog_index->field[6]->store(row.n_schemaops); + + int r; + if ((r= binlog_index->file->write_row(binlog_index->record[0]))) + { + sql_print_error("NDB Binlog: Writing row to binlog_index: %d", r); + error= -1; + goto add_binlog_index_err; + } + + mysql_unlock_tables(thd, thd->lock); + thd->lock= 0; + return 0; +add_binlog_index_err: + close_thread_tables(thd); + binlog_index= 0; + return error; +} + +/********************************************************************* + Functions for start, stop, wait for ndbcluster binlog thread +*********************************************************************/ + +static int do_ndbcluster_binlog_close_connection= 0; + +int ndbcluster_binlog_start() +{ + DBUG_ENTER("ndbcluster_binlog_start"); + + pthread_mutex_init(&injector_mutex, MY_MUTEX_INIT_FAST); + pthread_cond_init(&injector_cond, NULL); + + /* Create injector thread */ + if (pthread_create(&ndb_binlog_thread, &connection_attrib, + ndb_binlog_thread_func, 0)) + { + DBUG_PRINT("error", ("Could not create ndb injector thread")); + pthread_cond_destroy(&injector_cond); + pthread_mutex_destroy(&injector_mutex); + DBUG_RETURN(-1); + } + + /* + Wait for the ndb injector thread to finish starting up. + */ + pthread_mutex_lock(&injector_mutex); + while (!ndb_binlog_thread_running) + pthread_cond_wait(&injector_cond, &injector_mutex); + pthread_mutex_unlock(&injector_mutex); + + if (ndb_binlog_thread_running < 0) + DBUG_RETURN(-1); + + DBUG_RETURN(0); +} + +static void ndbcluster_binlog_close_connection(THD *thd) +{ + DBUG_ENTER("ndbcluster_binlog_close_connection"); + const char *save_info= thd->proc_info; + thd->proc_info= "ndbcluster_binlog_close_connection"; + do_ndbcluster_binlog_close_connection= 1; + while (ndb_binlog_thread_running > 0) + sleep(1); + thd->proc_info= save_info; + DBUG_VOID_RETURN; +} + +/************************************************************** + Internal helper functions for creating/dropping ndb events + used by the client sql threads +**************************************************************/ +void +ndb_rep_event_name(String *event_name,const char *db, const char *tbl) +{ + event_name->set_ascii("REPL$", 5); + event_name->append(db); + if (tbl) + { + event_name->append('/'); + event_name->append(tbl); + } +} + +/* + Common function for setting up everything for logging a table at + create/discover. +*/ +int ndbcluster_create_binlog_setup(Ndb *ndb, const char *key, + uint key_len, + const char *db, + const char *table_name, + my_bool share_may_exist) +{ + int do_event_op= ndb_binlog_running; + DBUG_ENTER("ndbcluster_create_binlog_setup"); + DBUG_PRINT("enter",("key: %s key_len: %d %s.%s share_may_exist: %d", + key, key_len, db, table_name, share_may_exist)); + DBUG_ASSERT(! IS_NDB_BLOB_PREFIX(table_name)); + DBUG_ASSERT(strlen(key) == key_len); + + pthread_mutex_lock(&ndbcluster_mutex); + + /* Handle any trailing share */ + NDB_SHARE *share= (NDB_SHARE*) hash_search(&ndbcluster_open_tables, + (byte*) key, key_len); + + if (share && share_may_exist) + { + if (share->flags & NSF_NO_BINLOG || + share->op != 0 || + share->op_old != 0) + { + pthread_mutex_unlock(&ndbcluster_mutex); + DBUG_RETURN(0); // replication already setup, or should not + } + } + + if (share) + { + if (share->op || share->op_old) + { + my_errno= HA_ERR_TABLE_EXIST; + pthread_mutex_unlock(&ndbcluster_mutex); + DBUG_RETURN(1); + } + handle_trailing_share(share); + } + + /* Create share which is needed to hold replication information */ + if (!(share= get_share(key, 0, true, true))) + { + sql_print_error("NDB Binlog: " + "allocating table share for %s failed", key); + } + + if (!schema_share && + strcmp(share->db, NDB_REP_DB) == 0 && + strcmp(share->table_name, NDB_SCHEMA_TABLE) == 0) + do_event_op= 1; + + if (!do_event_op) + { + share->flags|= NSF_NO_BINLOG; + pthread_mutex_unlock(&ndbcluster_mutex); + DBUG_RETURN(0); + } + pthread_mutex_unlock(&ndbcluster_mutex); + + while (share && !IS_TMP_PREFIX(table_name)) + { + /* + ToDo make sanity check of share so that the table is actually the same + I.e. we need to do open file from frm in this case + Currently awaiting this to be fixed in the 4.1 tree in the general + case + */ + + /* Create the event in NDB */ + ndb->setDatabaseName(db); + + NDBDICT *dict= ndb->getDictionary(); + const NDBTAB *ndbtab= dict->getTable(table_name); + if (ndbtab == 0) + { + if (ndb_extra_logging) + sql_print_information("NDB Binlog: Failed to get table %s from ndb: " + "%s, %d", key, dict->getNdbError().message, + dict->getNdbError().code); + break; // error + } + String event_name(INJECTOR_EVENT_LEN); + ndb_rep_event_name(&event_name, db, table_name); + /* + event should have been created by someone else, + but let's make sure, and create if it doesn't exist + */ + if (!dict->getEvent(event_name.c_ptr())) + { + if (ndbcluster_create_event(ndb, ndbtab, event_name.c_ptr(), share)) + { + sql_print_error("NDB Binlog: " + "FAILED CREATE (DISCOVER) TABLE Event: %s", + event_name.c_ptr()); + break; // error + } + if (ndb_extra_logging) + sql_print_information("NDB Binlog: " + "CREATE (DISCOVER) TABLE Event: %s", + event_name.c_ptr()); + } + else + if (ndb_extra_logging) + sql_print_information("NDB Binlog: DISCOVER TABLE Event: %s", + event_name.c_ptr()); + + /* + create the event operations for receiving logging events + */ + if (ndbcluster_create_event_ops(share, ndbtab, event_name.c_ptr())) + { + sql_print_error("NDB Binlog:" + "FAILED CREATE (DISCOVER) EVENT OPERATIONS Event: %s", + event_name.c_ptr()); + /* a warning has been issued to the client */ + DBUG_RETURN(0); + } + DBUG_RETURN(0); + } + DBUG_RETURN(-1); +} + +int +ndbcluster_create_event(Ndb *ndb, const NDBTAB *ndbtab, + const char *event_name, NDB_SHARE *share, + int push_warning) +{ + DBUG_ENTER("ndbcluster_create_event"); + DBUG_PRINT("info", ("table=%s version=%d event=%s share=%s", + ndbtab->getName(), ndbtab->getObjectVersion(), + event_name, share ? share->key : "(nil)")); + DBUG_ASSERT(! IS_NDB_BLOB_PREFIX(ndbtab->getName())); + if (!share) + { + DBUG_PRINT("info", ("share == NULL")); + DBUG_RETURN(0); + } + if (share->flags & NSF_NO_BINLOG) + { + DBUG_PRINT("info", ("share->flags & NSF_NO_BINLOG, flags: %x %d", share->flags, share->flags & NSF_NO_BINLOG)); + DBUG_RETURN(0); + } + + NDBDICT *dict= ndb->getDictionary(); + NDBEVENT my_event(event_name); + my_event.setTable(*ndbtab); + my_event.addTableEvent(NDBEVENT::TE_ALL); + if (share->flags & NSF_HIDDEN_PK) + { + if (share->flags & NSF_BLOB_FLAG) + { + sql_print_error("NDB Binlog: logging of table %s " + "with BLOB attribute and no PK is not supported", + share->key); + if (push_warning) + push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR, + ER_ILLEGAL_HA_CREATE_OPTION, ER(ER_ILLEGAL_HA_CREATE_OPTION), + ndbcluster_hton.name, + "Binlog of table with BLOB attribute and no PK"); + + share->flags|= NSF_NO_BINLOG; + DBUG_RETURN(-1); + } + /* No primary key, subscribe for all attributes */ + my_event.setReport(NDBEVENT::ER_ALL); + DBUG_PRINT("info", ("subscription all")); + } + else + { + if (schema_share || strcmp(share->db, NDB_REP_DB) || + strcmp(share->table_name, NDB_SCHEMA_TABLE)) + { + my_event.setReport(NDBEVENT::ER_UPDATED); + DBUG_PRINT("info", ("subscription only updated")); + } + else + { + my_event.setReport((NDBEVENT::EventReport) + (NDBEVENT::ER_ALL | NDBEVENT::ER_SUBSCRIBE)); + DBUG_PRINT("info", ("subscription all and subscribe")); + } + } + if (share->flags & NSF_BLOB_FLAG) + my_event.mergeEvents(true); + + /* add all columns to the event */ + int n_cols= ndbtab->getNoOfColumns(); + for(int a= 0; a < n_cols; a++) + my_event.addEventColumn(a); + + if (dict->createEvent(my_event)) // Add event to database + { + if (dict->getNdbError().classification != NdbError::SchemaObjectExists) + { + /* + failed, print a warning + */ + if (push_warning) + push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR, + ER_GET_ERRMSG, ER(ER_GET_ERRMSG), + dict->getNdbError().code, + dict->getNdbError().message, "NDB"); + sql_print_error("NDB Binlog: Unable to create event in database. " + "Event: %s Error Code: %d Message: %s", event_name, + dict->getNdbError().code, dict->getNdbError().message); + DBUG_RETURN(-1); + } + + /* + trailing event from before; an error, but try to correct it + */ + if (dict->dropEvent(my_event.getName())) + { + if (push_warning) + push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR, + ER_GET_ERRMSG, ER(ER_GET_ERRMSG), + dict->getNdbError().code, + dict->getNdbError().message, "NDB"); + sql_print_error("NDB Binlog: Unable to create event in database. " + " Attempt to correct with drop failed. " + "Event: %s Error Code: %d Message: %s", + event_name, + dict->getNdbError().code, + dict->getNdbError().message); + DBUG_RETURN(-1); + } + + /* + try to add the event again + */ + if (dict->createEvent(my_event)) + { + if (push_warning) + push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR, + ER_GET_ERRMSG, ER(ER_GET_ERRMSG), + dict->getNdbError().code, + dict->getNdbError().message, "NDB"); + sql_print_error("NDB Binlog: Unable to create event in database. " + " Attempt to correct with drop ok, but create failed. " + "Event: %s Error Code: %d Message: %s", + event_name, + dict->getNdbError().code, + dict->getNdbError().message); + DBUG_RETURN(-1); + } +#ifdef NDB_BINLOG_EXTRA_WARNINGS + push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR, + ER_GET_ERRMSG, ER(ER_GET_ERRMSG), + 0, "NDB Binlog: Removed trailing event", + "NDB"); +#endif + } + + DBUG_RETURN(0); +} + +inline int is_ndb_compatible_type(Field *field) +{ + return + !(field->flags & BLOB_FLAG) && + field->type() != MYSQL_TYPE_BIT && + field->pack_length() != 0; +} + +/* + - create eventOperations for receiving log events + - setup ndb recattrs for reception of log event data + - "start" the event operation + + used at create/discover of tables +*/ +int +ndbcluster_create_event_ops(NDB_SHARE *share, const NDBTAB *ndbtab, + const char *event_name) +{ + /* + we are in either create table or rename table so table should be + locked, hence we can work with the share without locks + */ + + DBUG_ENTER("ndbcluster_create_event_ops"); + DBUG_ASSERT(! IS_NDB_BLOB_PREFIX(ndbtab->getName())); + + DBUG_ASSERT(share != 0); + + if (share->flags & NSF_NO_BINLOG) + { + DBUG_PRINT("info", ("share->flags & NSF_NO_BINLOG, flags: %x", share->flags)); + DBUG_RETURN(0); + } + + if (!binlog_filter->db_ok(share->db)) + { + share->flags|= NSF_NO_BINLOG; + DBUG_RETURN(0); + } + + if (share->op) + { + assert(share->op->getCustomData() == (void *) share); + + DBUG_ASSERT(share->use_count > 1); + sql_print_error("NDB Binlog: discover reusing old ev op"); + free_share(&share); // old event op already has reference + DBUG_RETURN(0); + } + + TABLE *table= share->table; + + int do_schema_share= 0, do_apply_status_share= 0; + int retries= 100; + if (!schema_share && strcmp(share->db, NDB_REP_DB) == 0 && + strcmp(share->table_name, NDB_SCHEMA_TABLE) == 0) + do_schema_share= 1; + else if (!apply_status_share && strcmp(share->db, NDB_REP_DB) == 0 && + strcmp(share->table_name, NDB_APPLY_TABLE) == 0) + do_apply_status_share= 1; + + while (1) + { + pthread_mutex_lock(&injector_mutex); + Ndb *ndb= injector_ndb; + if (do_schema_share) + ndb= schema_ndb; + + if (ndb == 0) + { + pthread_mutex_unlock(&injector_mutex); + DBUG_RETURN(-1); + } + + NdbEventOperation* op; + if (do_schema_share) + op= ndb->createEventOperation(event_name); + else + { + // set injector_ndb database/schema from table internal name + int ret= ndb->setDatabaseAndSchemaName(ndbtab); + assert(ret == 0); + op= ndb->createEventOperation(event_name); + // reset to catch errors + ndb->setDatabaseName(""); + } + if (!op) + { + pthread_mutex_unlock(&injector_mutex); + sql_print_error("NDB Binlog: Creating NdbEventOperation failed for" + " %s",event_name); + push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR, + ER_GET_ERRMSG, ER(ER_GET_ERRMSG), + ndb->getNdbError().code, + ndb->getNdbError().message, + "NDB"); + DBUG_RETURN(-1); + } + + if (share->flags & NSF_BLOB_FLAG) + op->mergeEvents(true); // currently not inherited from event + + int n_columns= ndbtab->getNoOfColumns(); + int n_fields= table ? table->s->fields : 0; // XXX ??? + for (int j= 0; j < n_columns; j++) + { + const char *col_name= ndbtab->getColumn(j)->getName(); + NdbValue attr0, attr1; + if (j < n_fields) + { + Field *f= share->table->field[j]; + if (is_ndb_compatible_type(f)) + { + DBUG_PRINT("info", ("%s compatible", col_name)); + attr0.rec= op->getValue(col_name, f->ptr); + attr1.rec= op->getPreValue(col_name, + (f->ptr - share->table->record[0]) + + share->table->record[1]); + } + else if (! (f->flags & BLOB_FLAG)) + { + DBUG_PRINT("info", ("%s non compatible", col_name)); + attr0.rec= op->getValue(col_name); + attr1.rec= op->getPreValue(col_name); + } + else + { + DBUG_PRINT("info", ("%s blob", col_name)); + attr0.blob= op->getBlobHandle(col_name); + attr1.blob= op->getPreBlobHandle(col_name); + if (attr0.blob == NULL || attr1.blob == NULL) + { + sql_print_error("NDB Binlog: Creating NdbEventOperation" + " blob field %u handles failed (code=%d) for %s", + j, op->getNdbError().code, event_name); + push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR, + ER_GET_ERRMSG, ER(ER_GET_ERRMSG), + op->getNdbError().code, + op->getNdbError().message, + "NDB"); + ndb->dropEventOperation(op); + pthread_mutex_unlock(&injector_mutex); + DBUG_RETURN(-1); + } + } + } + else + { + DBUG_PRINT("info", ("%s hidden key", col_name)); + attr0.rec= op->getValue(col_name); + attr1.rec= op->getPreValue(col_name); + } + share->ndb_value[0][j].ptr= attr0.ptr; + share->ndb_value[1][j].ptr= attr1.ptr; + } + op->setCustomData((void *) share); // set before execute + share->op= op; // assign op in NDB_SHARE + if (op->execute()) + { + share->op= NULL; + retries--; + if (op->getNdbError().status != NdbError::TemporaryError && + op->getNdbError().code != 1407) + retries= 0; + if (retries == 0) + { + push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR, + ER_GET_ERRMSG, ER(ER_GET_ERRMSG), + op->getNdbError().code, op->getNdbError().message, + "NDB"); + sql_print_error("NDB Binlog: ndbevent->execute failed for %s; %d %s", + event_name, + op->getNdbError().code, op->getNdbError().message); + } + ndb->dropEventOperation(op); + pthread_mutex_unlock(&injector_mutex); + if (retries) + continue; + DBUG_RETURN(-1); + } + pthread_mutex_unlock(&injector_mutex); + break; + } + + get_share(share); + if (do_apply_status_share) + apply_status_share= get_share(share); + else if (do_schema_share) + schema_share= get_share(share); + + DBUG_PRINT("info",("%s share->op: 0x%lx, share->use_count: %u", + share->key, share->op, share->use_count)); + + if (ndb_extra_logging) + sql_print_information("NDB Binlog: logging %s", share->key); + DBUG_RETURN(0); +} + +/* + when entering the calling thread should have a share lock id share != 0 + then the injector thread will have one as well, i.e. share->use_count == 0 + (unless it has already dropped... then share->op == 0) +*/ +int +ndbcluster_handle_drop_table(Ndb *ndb, const char *event_name, + NDB_SHARE *share) +{ + DBUG_ENTER("ndbcluster_handle_drop_table"); + + NDBDICT *dict= ndb->getDictionary(); + if (event_name && dict->dropEvent(event_name)) + { + if (dict->getNdbError().code != 4710) + { + /* drop event failed for some reason, issue a warning */ + push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR, + ER_GET_ERRMSG, ER(ER_GET_ERRMSG), + dict->getNdbError().code, + dict->getNdbError().message, "NDB"); + /* error is not that the event did not exist */ + sql_print_error("NDB Binlog: Unable to drop event in database. " + "Event: %s Error Code: %d Message: %s", + event_name, + dict->getNdbError().code, + dict->getNdbError().message); + /* ToDo; handle error? */ + if (share && share->op && + share->op->getState() == NdbEventOperation::EO_EXECUTING && + dict->getNdbError().code != 4009) + { + DBUG_ASSERT(false); + DBUG_RETURN(-1); + } + } + } + + if (share == 0 || share->op == 0) + { + DBUG_RETURN(0); + } + +/* + Syncronized drop between client thread and injector thread is + neccessary in order to maintain ordering in the binlog, + such that the drop occurs _after_ any inserts/updates/deletes. + + The penalty for this is that the drop table becomes slow. + + This wait is however not strictly neccessary to produce a binlog + that is usable. However the slave does not currently handle + these out of order, thus we are keeping the SYNC_DROP_ defined + for now. +*/ +#define SYNC_DROP_ +#ifdef SYNC_DROP_ + (void) pthread_mutex_lock(&share->mutex); + int max_timeout= 10; + while (share->op) + { + struct timespec abstime; + set_timespec(abstime, 1); + (void) pthread_cond_timedwait(&injector_cond, + &share->mutex, + &abstime); + max_timeout--; + if (share->op == 0) + break; + if (max_timeout == 0) + { + sql_print_error("NDB delete table: timed out. Ignoring..."); + break; + } + if (ndb_extra_logging) + sql_print_information("NDB delete table: " + "waiting max %u sec for drop table %s.", + max_timeout, share->key); + } + (void) pthread_mutex_unlock(&share->mutex); +#else + (void) pthread_mutex_lock(&share->mutex); + share->op_old= share->op; + share->op= 0; + (void) pthread_mutex_unlock(&share->mutex); +#endif + + DBUG_RETURN(0); +} + + +/******************************************************************** + Internal helper functions for differentd events from the stoarage nodes + used by the ndb injector thread +********************************************************************/ + +/* + Handle error states on events from the storage nodes +*/ +static int ndb_binlog_thread_handle_error(Ndb *ndb, NdbEventOperation *pOp, + Binlog_index_row &row) +{ + NDB_SHARE *share= (NDB_SHARE *)pOp->getCustomData(); + DBUG_ENTER("ndb_binlog_thread_handle_error"); + + int overrun= pOp->isOverrun(); + if (overrun) + { + /* + ToDo: this error should rather clear the binlog_index... + and continue + */ + sql_print_error("NDB Binlog: Overrun in event buffer, " + "this means we have dropped events. Cannot " + "continue binlog for %s", share->key); + pOp->clearError(); + DBUG_RETURN(-1); + } + + if (!pOp->isConsistent()) + { + /* + ToDo: this error should rather clear the binlog_index... + and continue + */ + sql_print_error("NDB Binlog: Not Consistent. Cannot " + "continue binlog for %s. Error code: %d" + " Message: %s", share->key, + pOp->getNdbError().code, + pOp->getNdbError().message); + pOp->clearError(); + DBUG_RETURN(-1); + } + sql_print_error("NDB Binlog: unhandled error %d for table %s", + pOp->hasError(), share->key); + pOp->clearError(); + DBUG_RETURN(0); +} + +static int +ndb_binlog_thread_handle_non_data_event(Ndb *ndb, NdbEventOperation *pOp, + Binlog_index_row &row) +{ + NDB_SHARE *share= (NDB_SHARE *)pOp->getCustomData(); + NDBEVENT::TableEvent type= pOp->getEventType(); + + /* make sure to flush any pending events as they can be dependent + on one of the tables being changed below + */ + injector_thd->binlog_flush_pending_rows_event(true); + + switch (type) + { + case NDBEVENT::TE_CLUSTER_FAILURE: + if (apply_status_share == share) + { + free_share(&apply_status_share); + apply_status_share= 0; + } + if (ndb_extra_logging) + sql_print_information("NDB Binlog: cluster failure for %s.", share->key); + DBUG_PRINT("info", ("CLUSTER FAILURE EVENT: " + "%s received share: 0x%lx op: %lx share op: %lx " + "op_old: %lx", + share->key, share, pOp, share->op, share->op_old)); + break; + case NDBEVENT::TE_ALTER: + if (pOp->tableNameChanged()) + { + DBUG_PRINT("info", ("Detected name change of table %s.%s", + share->db, share->table_name)); + /* ToDo: remove printout */ + if (ndb_extra_logging) + sql_print_information("NDB Binlog: rename table %s%s/%s -> %s.", + share_prefix, share->table->s->db.str, + share->table->s->table_name.str, + share->key); + /* do the rename of the table in the share */ + share->table->s->db.str= share->db; + share->table->s->db.length= strlen(share->db); + share->table->s->table_name.str= share->table_name; + share->table->s->table_name.length= strlen(share->table_name); + } + goto drop_alter_common; + case NDBEVENT::TE_DROP: + if (apply_status_share == share) + { + free_share(&apply_status_share); + apply_status_share= 0; + } + /* ToDo: remove printout */ + if (ndb_extra_logging) + sql_print_information("NDB Binlog: drop table %s.", share->key); +drop_alter_common: + row.n_schemaops++; + DBUG_PRINT("info", ("TABLE %s EVENT: %s received share: 0x%lx op: %lx " + "share op: %lx op_old: %lx", + type == NDBEVENT::TE_DROP ? "DROP" : "ALTER", + share->key, share, pOp, share->op, share->op_old)); + break; + case NDBEVENT::TE_NODE_FAILURE: + /* fall through */ + case NDBEVENT::TE_SUBSCRIBE: + /* fall through */ + case NDBEVENT::TE_UNSUBSCRIBE: + /* ignore */ + return 0; + default: + sql_print_error("NDB Binlog: unknown non data event %d for %s. " + "Ignoring...", (unsigned) type, share->key); + return 0; + } + + ndb_handle_schema_change(injector_thd, ndb, pOp, share); + return 0; +} + +/* + Handle data events from the storage nodes +*/ +static int +ndb_binlog_thread_handle_data_event(Ndb *ndb, NdbEventOperation *pOp, + Binlog_index_row &row, + injector::transaction &trans) +{ + NDB_SHARE *share= (NDB_SHARE*) pOp->getCustomData(); + if (share == apply_status_share) + return 0; + TABLE *table= share->table; + + assert(table != 0); + + dbug_print_table("table", table); + + TABLE_SHARE *table_s= table->s; + uint n_fields= table_s->fields; + MY_BITMAP b; + /* Potential buffer for the bitmap */ + uint32 bitbuf[128 / (sizeof(uint32) * 8)]; + bitmap_init(&b, n_fields <= sizeof(bitbuf) * 8 ? bitbuf : NULL, + n_fields, false); + bitmap_set_all(&b); + + /* + row data is already in table->record[0] + As we told the NdbEventOperation to do this + (saves moving data about many times) + */ + + /* + for now malloc/free blobs buffer each time + TODO if possible share single permanent buffer with handlers + */ + byte* blobs_buffer[2] = { 0, 0 }; + uint blobs_buffer_size[2] = { 0, 0 }; + + switch(pOp->getEventType()) + { + case NDBEVENT::TE_INSERT: + row.n_inserts++; + DBUG_PRINT("info", ("INSERT INTO %s", share->key)); + { + if (share->flags & NSF_BLOB_FLAG) + { + my_ptrdiff_t ptrdiff= 0; + int ret= get_ndb_blobs_value(table, share->ndb_value[0], + blobs_buffer[0], blobs_buffer_size[0], + ptrdiff); + DBUG_ASSERT(ret == 0); + } + ndb_unpack_record(table, share->ndb_value[0], &b, table->record[0]); + trans.write_row(::server_id, injector::transaction::table(table, true), + &b, n_fields, table->record[0]); + } + break; + case NDBEVENT::TE_DELETE: + row.n_deletes++; + DBUG_PRINT("info",("DELETE FROM %s", share->key)); + { + /* + table->record[0] contains only the primary key in this case + since we do not have an after image + */ + int n; + if (table->s->primary_key != MAX_KEY) + n= 0; /* + use the primary key only as it save time and space and + it is the only thing needed to log the delete + */ + else + n= 1; /* + we use the before values since we don't have a primary key + since the mysql server does not handle the hidden primary + key + */ + + if (share->flags & NSF_BLOB_FLAG) + { + my_ptrdiff_t ptrdiff= table->record[n] - table->record[0]; + int ret= get_ndb_blobs_value(table, share->ndb_value[n], + blobs_buffer[n], blobs_buffer_size[n], + ptrdiff); + DBUG_ASSERT(ret == 0); + } + ndb_unpack_record(table, share->ndb_value[n], &b, table->record[n]); + DBUG_EXECUTE("info", print_records(table, table->record[n]);); + trans.delete_row(::server_id, injector::transaction::table(table, true), + &b, n_fields, table->record[n]); + } + break; + case NDBEVENT::TE_UPDATE: + row.n_updates++; + DBUG_PRINT("info", ("UPDATE %s", share->key)); + { + if (share->flags & NSF_BLOB_FLAG) + { + my_ptrdiff_t ptrdiff= 0; + int ret= get_ndb_blobs_value(table, share->ndb_value[0], + blobs_buffer[0], blobs_buffer_size[0], + ptrdiff); + DBUG_ASSERT(ret == 0); + } + ndb_unpack_record(table, share->ndb_value[0], + &b, table->record[0]); + DBUG_EXECUTE("info", print_records(table, table->record[0]);); + if (table->s->primary_key != MAX_KEY) + { + /* + since table has a primary key, we can do a write + using only after values + */ + trans.write_row(::server_id, injector::transaction::table(table, true), + &b, n_fields, table->record[0]);// after values + } + else + { + /* + mysql server cannot handle the ndb hidden key and + therefore needs the before image as well + */ + if (share->flags & NSF_BLOB_FLAG) + { + my_ptrdiff_t ptrdiff= table->record[1] - table->record[0]; + int ret= get_ndb_blobs_value(table, share->ndb_value[1], + blobs_buffer[1], blobs_buffer_size[1], + ptrdiff); + DBUG_ASSERT(ret == 0); + } + ndb_unpack_record(table, share->ndb_value[1], &b, table->record[1]); + DBUG_EXECUTE("info", print_records(table, table->record[1]);); + trans.update_row(::server_id, + injector::transaction::table(table, true), + &b, n_fields, + table->record[1], // before values + table->record[0]);// after values + } + } + break; + default: + /* We should REALLY never get here. */ + DBUG_PRINT("info", ("default - uh oh, a brain exploded.")); + break; + } + + if (share->flags & NSF_BLOB_FLAG) + { + my_free(blobs_buffer[0], MYF(MY_ALLOW_ZERO_PTR)); + my_free(blobs_buffer[1], MYF(MY_ALLOW_ZERO_PTR)); + } + + return 0; +} + +//#define RUN_NDB_BINLOG_TIMER +#ifdef RUN_NDB_BINLOG_TIMER +class Timer +{ +public: + Timer() { start(); } + void start() { gettimeofday(&m_start, 0); } + void stop() { gettimeofday(&m_stop, 0); } + ulong elapsed_ms() + { + return (ulong) + (((longlong) m_stop.tv_sec - (longlong) m_start.tv_sec) * 1000 + + ((longlong) m_stop.tv_usec - + (longlong) m_start.tv_usec + 999) / 1000); + } +private: + struct timeval m_start,m_stop; +}; +#endif + +/**************************************************************** + Injector thread main loop +****************************************************************/ + +pthread_handler_t ndb_binlog_thread_func(void *arg) +{ + THD *thd; /* needs to be first for thread_stack */ + Ndb *ndb= 0; + Thd_ndb *thd_ndb=0; + int ndb_update_binlog_index= 1; + injector *inj= injector::instance(); + + pthread_mutex_lock(&injector_mutex); + /* + Set up the Thread + */ + my_thread_init(); + DBUG_ENTER("ndb_binlog_thread"); + + thd= new THD; /* note that contructor of THD uses DBUG_ */ + THD_CHECK_SENTRY(thd); + + thd->thread_stack= (char*) &thd; /* remember where our stack is */ + if (thd->store_globals()) + { + thd->cleanup(); + delete thd; + ndb_binlog_thread_running= -1; + pthread_mutex_unlock(&injector_mutex); + pthread_cond_signal(&injector_cond); + my_thread_end(); + pthread_exit(0); + DBUG_RETURN(NULL); + } + + thd->init_for_queries(); + thd->command= COM_DAEMON; + thd->system_thread= SYSTEM_THREAD_NDBCLUSTER_BINLOG; + thd->version= refresh_version; + thd->set_time(); + thd->main_security_ctx.host_or_ip= ""; + thd->client_capabilities= 0; + my_net_init(&thd->net, 0); + thd->main_security_ctx.master_access= ~0; + thd->main_security_ctx.priv_user= 0; + + /* + Set up ndb binlog + */ + sql_print_information("Starting MySQL Cluster Binlog Thread"); + + pthread_detach_this_thread(); + thd->real_id= pthread_self(); + pthread_mutex_lock(&LOCK_thread_count); + thd->thread_id= thread_id++; + threads.append(thd); + pthread_mutex_unlock(&LOCK_thread_count); + thd->lex->start_transaction_opt= 0; + + if (!(schema_ndb= new Ndb(g_ndb_cluster_connection, "")) || + schema_ndb->init()) + { + sql_print_error("NDB Binlog: Getting Schema Ndb object failed"); + goto err; + } + + // empty database + if (!(ndb= new Ndb(g_ndb_cluster_connection, "")) || + ndb->init()) + { + sql_print_error("NDB Binlog: Getting Ndb object failed"); + ndb_binlog_thread_running= -1; + pthread_mutex_unlock(&injector_mutex); + pthread_cond_signal(&injector_cond); + goto err; + } + + /* + Expose global reference to our ndb object. + + Used by both sql client thread and binlog thread to interact + with the storage + pthread_mutex_lock(&injector_mutex); + */ + injector_thd= thd; + injector_ndb= ndb; + ndb_binlog_thread_running= 1; + if (opt_bin_log) + { + if (global_system_variables.binlog_format == BINLOG_FORMAT_ROW) + { + ndb_binlog_running= TRUE; + } + else + { + sql_print_error("NDB: only row based binary logging is supported"); + } + } + /* + We signal the thread that started us that we've finished + starting up. + */ + pthread_mutex_unlock(&injector_mutex); + pthread_cond_signal(&injector_cond); + + thd->proc_info= "Waiting for ndbcluster to start"; + + pthread_mutex_lock(&injector_mutex); + while (!ndbcluster_util_inited) + { + /* ndb not connected yet */ + struct timespec abstime; + set_timespec(abstime, 1); + pthread_cond_timedwait(&injector_cond, &injector_mutex, &abstime); + if (abort_loop) + { + pthread_mutex_unlock(&injector_mutex); + goto err; + } + } + pthread_mutex_unlock(&injector_mutex); + + /* + Main NDB Injector loop + */ + + DBUG_ASSERT(ndbcluster_hton.slot != ~(uint)0); + if (!(thd_ndb= ha_ndbcluster::seize_thd_ndb())) + { + sql_print_error("Could not allocate Thd_ndb object"); + goto err; + } + set_thd_ndb(thd, thd_ndb); + thd_ndb->options|= TNO_NO_LOG_SCHEMA_OP; + thd->query_id= 0; // to keep valgrind quiet + { + static char db[]= ""; + thd->db= db; + if (ndb_binlog_running) + open_binlog_index(thd, &binlog_tables, &binlog_index); + if (!apply_status_share) + { + sql_print_error("NDB: Could not get apply status share"); + } + thd->db= db; + } + +#ifdef RUN_NDB_BINLOG_TIMER + Timer main_timer; +#endif + for ( ; !((abort_loop || do_ndbcluster_binlog_close_connection) && + ndb_latest_handled_binlog_epoch >= g_latest_trans_gci); ) + { + +#ifdef RUN_NDB_BINLOG_TIMER + main_timer.stop(); + sql_print_information("main_timer %ld ms", main_timer.elapsed_ms()); + main_timer.start(); +#endif + + /* + now we don't want any events before next gci is complete + */ + thd->proc_info= "Waiting for event from ndbcluster"; + thd->set_time(); + + /* wait for event or 1000 ms */ + Uint64 gci= 0, schema_gci; + int res= 0, tot_poll_wait= 1000; + if (ndb_binlog_running) + { + res= ndb->pollEvents(tot_poll_wait, &gci); + tot_poll_wait= 0; + } + int schema_res= schema_ndb->pollEvents(tot_poll_wait, &schema_gci); + ndb_latest_received_binlog_epoch= gci; + + while (gci > schema_gci && schema_res >= 0) + schema_res= schema_ndb->pollEvents(10, &schema_gci); + + if ((abort_loop || do_ndbcluster_binlog_close_connection) && + (ndb_latest_handled_binlog_epoch >= g_latest_trans_gci || + !ndb_binlog_running)) + break; /* Shutting down server */ + + if (binlog_index && binlog_index->s->version < refresh_version) + { + if (binlog_index->s->version < refresh_version) + { + close_thread_tables(thd); + binlog_index= 0; + } + } + + MEM_ROOT **root_ptr= + my_pthread_getspecific_ptr(MEM_ROOT**, THR_MALLOC); + MEM_ROOT *old_root= *root_ptr; + MEM_ROOT mem_root; + init_sql_alloc(&mem_root, 4096, 0); + List<Cluster_replication_schema> post_epoch_log_list; + List<Cluster_replication_schema> post_epoch_unlock_list; + *root_ptr= &mem_root; + + if (unlikely(schema_res > 0)) + { + schema_ndb-> + setReportThreshEventGCISlip(ndb_report_thresh_binlog_epoch_slip); + schema_ndb->setReportThreshEventFreeMem(ndb_report_thresh_binlog_mem_usage); + NdbEventOperation *pOp= schema_ndb->nextEvent(); + while (pOp != NULL) + { + if (!pOp->hasError()) + ndb_binlog_thread_handle_schema_event(thd, schema_ndb, pOp, + &post_epoch_log_list, + &post_epoch_unlock_list, + &mem_root); + else + sql_print_error("NDB: error %lu (%s) on handling " + "binlog schema event", + (ulong) pOp->getNdbError().code, + pOp->getNdbError().message); + pOp= schema_ndb->nextEvent(); + } + } + + if (res > 0) + { + DBUG_PRINT("info", ("pollEvents res: %d", res)); +#ifdef RUN_NDB_BINLOG_TIMER + Timer gci_timer, write_timer; + int event_count= 0; +#endif + thd->proc_info= "Processing events"; + NdbEventOperation *pOp= ndb->nextEvent(); + Binlog_index_row row; + while (pOp != NULL) + { + // sometimes get TE_ALTER with invalid table + DBUG_ASSERT(pOp->getEventType() == NdbDictionary::Event::TE_ALTER || + ! IS_NDB_BLOB_PREFIX(pOp->getEvent()->getTable()->getName())); + ndb-> + setReportThreshEventGCISlip(ndb_report_thresh_binlog_epoch_slip); + ndb->setReportThreshEventFreeMem(ndb_report_thresh_binlog_mem_usage); + + assert(pOp->getGCI() <= ndb_latest_received_binlog_epoch); + bzero((char*) &row, sizeof(row)); + injector::transaction trans= inj->new_trans(thd); + { // pass table map before epoch + Uint32 iter=0; + const NdbEventOperation* gci_op; + Uint32 event_types; + while ((gci_op=ndb->getGCIEventOperations(&iter, &event_types)) + != NULL) + { + NDB_SHARE* share=(NDB_SHARE*)gci_op->getCustomData(); + DBUG_PRINT("info", ("per gci op %p share %p event types 0x%x", + gci_op, share, event_types)); + // this should not happen + if (share == NULL || share->table == NULL) + { + DBUG_PRINT("info", ("no share or table !")); + continue; + } + TABLE* table=share->table; + const LEX_STRING& name=table->s->table_name; + DBUG_PRINT("info", ("use_table: %.*s", name.length, name.str)); + injector::transaction::table tbl(table, true); + // TODO enable when mats patch pushed + //trans.use_table(::server_id, tbl); + } + } + gci= pOp->getGCI(); + if (apply_status_share) + { + TABLE *table= apply_status_share->table; + + const LEX_STRING& name=table->s->table_name; + DBUG_PRINT("info", ("use_table: %.*s", name.length, name.str)); + injector::transaction::table tbl(table, true); + // TODO enable when mats patch pushed + //trans.use_table(::server_id, tbl); + + MY_BITMAP b; + uint32 bitbuf; + DBUG_ASSERT(table->s->fields <= sizeof(bitbuf) * 8); + bitmap_init(&b, &bitbuf, table->s->fields, false); + bitmap_set_all(&b); + table->field[0]->store((longlong)::server_id); + table->field[1]->store((longlong)gci); + trans.write_row(::server_id, + injector::transaction::table(table, true), + &b, table->s->fields, + table->record[0]); + } + else + { + sql_print_error("NDB: Could not get apply status share"); + } +#ifdef RUN_NDB_BINLOG_TIMER + write_timer.start(); +#endif + do + { +#ifdef RUN_NDB_BINLOG_TIMER + event_count++; +#endif + if (pOp->hasError() && + ndb_binlog_thread_handle_error(ndb, pOp, row) < 0) + goto err; + +#ifndef DBUG_OFF + { + NDB_SHARE *share= (NDB_SHARE*) pOp->getCustomData(); + DBUG_PRINT("info", + ("EVENT TYPE:%d GCI:%lld last applied: %lld " + "share: 0x%lx", pOp->getEventType(), gci, + ndb_latest_applied_binlog_epoch, share)); + DBUG_ASSERT(share != 0); + } +#endif + if ((unsigned) pOp->getEventType() < + (unsigned) NDBEVENT::TE_FIRST_NON_DATA_EVENT) + ndb_binlog_thread_handle_data_event(ndb, pOp, row, trans); + else + { + // set injector_ndb database/schema from table internal name + int ret= ndb->setDatabaseAndSchemaName(pOp->getEvent()->getTable()); + assert(ret == 0); + ndb_binlog_thread_handle_non_data_event(ndb, pOp, row); + // reset to catch errors + ndb->setDatabaseName(""); + } + + pOp= ndb->nextEvent(); + } while (pOp && pOp->getGCI() == gci); + + /* + note! pOp is not referring to an event in the next epoch + or is == 0 + */ +#ifdef RUN_NDB_BINLOG_TIMER + write_timer.stop(); +#endif + + if (row.n_inserts || row.n_updates + || row.n_deletes || row.n_schemaops) + { + injector::transaction::binlog_pos start= trans.start_pos(); + if (int r= trans.commit()) + { + sql_print_error("NDB binlog:" + "Error during COMMIT of GCI. Error: %d", + r); + /* TODO: Further handling? */ + } + row.gci= gci; + row.master_log_file= start.file_name(); + row.master_log_pos= start.file_pos(); + + DBUG_PRINT("info",("COMMIT gci %lld",gci)); + if (ndb_update_binlog_index) + ndb_add_binlog_index(thd, &row); + ndb_latest_applied_binlog_epoch= gci; + } + else + trans.commit(); + ndb_latest_handled_binlog_epoch= gci; +#ifdef RUN_NDB_BINLOG_TIMER + gci_timer.stop(); + sql_print_information("gci %ld event_count %d write time " + "%ld(%d e/s), total time %ld(%d e/s)", + (ulong)gci, event_count, + write_timer.elapsed_ms(), + event_count / write_timer.elapsed_ms(), + gci_timer.elapsed_ms(), + event_count / gci_timer.elapsed_ms()); +#endif + } + } + + /* + process any operations that should be done after + the epoch is complete + */ + { + Cluster_replication_schema *schema; + while ((schema= post_epoch_unlock_list.pop())) + { + ndbcluster_update_slock(thd, schema->db, schema->name); + } + while ((schema= post_epoch_log_list.pop())) + { + char *thd_db_save= thd->db; + thd->db= schema->db; + thd->binlog_query(THD::STMT_QUERY_TYPE, schema->query, + schema->query_length, FALSE, + schema->name[0] == 0); + thd->db= thd_db_save; + } + } + free_root(&mem_root, MYF(0)); + *root_ptr= old_root; + ndb_latest_handled_binlog_epoch= ndb_latest_received_binlog_epoch; + } +err: + DBUG_PRINT("info",("Shutting down cluster binlog thread")); + close_thread_tables(thd); + pthread_mutex_lock(&injector_mutex); + /* don't mess with the injector_ndb anymore from other threads */ + injector_ndb= 0; + pthread_mutex_unlock(&injector_mutex); + thd->db= 0; // as not to try to free memory + sql_print_information("Stopping Cluster Binlog"); + + if (apply_status_share) + free_share(&apply_status_share); + if (schema_share) + free_share(&schema_share); + + /* remove all event operations */ + if (ndb) + { + NdbEventOperation *op; + DBUG_PRINT("info",("removing all event operations")); + while ((op= ndb->getEventOperation())) + { + DBUG_ASSERT(! IS_NDB_BLOB_PREFIX(op->getEvent()->getTable()->getName())); + DBUG_PRINT("info",("removing event operation on %s", + op->getEvent()->getName())); + NDB_SHARE *share= (NDB_SHARE*) op->getCustomData(); + free_share(&share); + ndb->dropEventOperation(op); + } + delete ndb; + ndb= 0; + } + + // Placed here to avoid a memory leak; TODO: check if needed + net_end(&thd->net); + delete thd; + + ndb_binlog_thread_running= -1; + ndb_binlog_running= FALSE; + (void) pthread_cond_signal(&injector_cond); + + DBUG_PRINT("exit", ("ndb_binlog_thread")); + my_thread_end(); + + pthread_exit(0); + DBUG_RETURN(NULL); +} + +bool +ndbcluster_show_status_binlog(THD* thd, stat_print_fn *stat_print, + enum ha_stat_type stat_type) +{ + char buf[IO_SIZE]; + uint buflen; + ulonglong ndb_latest_epoch= 0; + DBUG_ENTER("ndbcluster_show_status_binlog"); + + pthread_mutex_lock(&injector_mutex); + if (injector_ndb) + { + ndb_latest_epoch= injector_ndb->getLatestGCI(); + pthread_mutex_unlock(&injector_mutex); + + buflen= + snprintf(buf, sizeof(buf), + "latest_epoch=%llu, " + "latest_trans_epoch=%llu, " + "latest_received_binlog_epoch=%llu, " + "latest_handled_binlog_epoch=%llu, " + "latest_applied_binlog_epoch=%llu", + ndb_latest_epoch, + g_latest_trans_gci, + ndb_latest_received_binlog_epoch, + ndb_latest_handled_binlog_epoch, + ndb_latest_applied_binlog_epoch); + if (stat_print(thd, ndbcluster_hton.name, strlen(ndbcluster_hton.name), + "binlog", strlen("binlog"), + buf, buflen)) + DBUG_RETURN(TRUE); + } + else + pthread_mutex_unlock(&injector_mutex); + DBUG_RETURN(FALSE); +} + +#endif /* HAVE_NDB_BINLOG */ diff --git a/sql/ha_ndbcluster_binlog.h b/sql/ha_ndbcluster_binlog.h new file mode 100644 index 00000000000..91ef53edd6b --- /dev/null +++ b/sql/ha_ndbcluster_binlog.h @@ -0,0 +1,175 @@ +/* Copyright (C) 2000-2003 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +// Typedefs for long names +typedef NdbDictionary::Object NDBOBJ; +typedef NdbDictionary::Column NDBCOL; +typedef NdbDictionary::Table NDBTAB; +typedef NdbDictionary::Index NDBINDEX; +typedef NdbDictionary::Dictionary NDBDICT; +typedef NdbDictionary::Event NDBEVENT; + +#define IS_TMP_PREFIX(A) (is_prefix(A, tmp_file_prefix) || is_prefix(A, "@0023sql")) + +extern ulong ndb_extra_logging; + +#define INJECTOR_EVENT_LEN 200 + +/* + The numbers below must not change as they + are passed between mysql servers, and if changed + would break compatablility. Add new numbers to + the end. +*/ +enum SCHEMA_OP_TYPE +{ + SOT_DROP_TABLE= 0, + SOT_CREATE_TABLE= 1, + SOT_RENAME_TABLE= 2, + SOT_ALTER_TABLE= 3, + SOT_DROP_DB= 4, + SOT_CREATE_DB= 5, + SOT_ALTER_DB= 6, + SOT_CLEAR_SLOCK= 7, + SOT_TABLESPACE= 8, + SOT_LOGFILE_GROUP= 9 +}; + +const uint max_ndb_nodes= 64; /* multiple of 32 */ + +static const char *ha_ndb_ext=".ndb"; +static const char share_prefix[]= "./"; + +#ifdef HAVE_NDB_BINLOG +extern pthread_t ndb_binlog_thread; +extern pthread_mutex_t injector_mutex; +extern pthread_cond_t injector_cond; + +extern unsigned char g_node_id_map[max_ndb_nodes]; +extern handlerton ndbcluster_hton; +extern pthread_t ndb_util_thread; +extern pthread_mutex_t LOCK_ndb_util_thread; +extern pthread_cond_t COND_ndb_util_thread; +extern int ndbcluster_util_inited; +extern pthread_mutex_t ndbcluster_mutex; +extern HASH ndbcluster_open_tables; +extern Ndb_cluster_connection* g_ndb_cluster_connection; +extern long ndb_number_of_storage_nodes; + +/* + Initialize the binlog part of the ndb handlerton +*/ +void ndbcluster_binlog_init_handlerton(); +/* + Initialize the binlog part of the NDB_SHARE +*/ +void ndbcluster_binlog_init_share(NDB_SHARE *share, TABLE *table); + +int ndbcluster_create_binlog_setup(Ndb *ndb, const char *key, + uint key_len, + const char *db, + const char *table_name, + my_bool share_may_exist); +int ndbcluster_create_event(Ndb *ndb, const NDBTAB *table, + const char *event_name, NDB_SHARE *share, + int push_warning= 0); +int ndbcluster_create_event_ops(NDB_SHARE *share, + const NDBTAB *ndbtab, + const char *event_name); +int ndbcluster_log_schema_op(THD *thd, NDB_SHARE *share, + const char *query, int query_length, + const char *db, const char *table_name, + uint32 ndb_table_id, + uint32 ndb_table_version, + enum SCHEMA_OP_TYPE type, + const char *old_db= 0, + const char *old_table_name= 0); +int ndbcluster_handle_drop_table(Ndb *ndb, const char *event_name, + NDB_SHARE *share); +void ndb_rep_event_name(String *event_name, + const char *db, const char *tbl); +int ndb_create_table_from_engine(THD *thd, const char *db, + const char *table_name); +int ndbcluster_binlog_start(); +pthread_handler_t ndb_binlog_thread_func(void *arg); + +/* + table cluster_replication.apply_status +*/ +void ndbcluster_setup_binlog_table_shares(THD *thd); +extern NDB_SHARE *apply_status_share; +extern NDB_SHARE *schema_share; + +extern THD *injector_thd; +extern my_bool ndb_binlog_running; + +bool +ndbcluster_show_status_binlog(THD* thd, stat_print_fn *stat_print, + enum ha_stat_type stat_type); + +/* + prototypes for ndb handler utility function also needed by + the ndb binlog code +*/ +int cmp_frm(const NDBTAB *ndbtab, const void *pack_data, + uint pack_length); +int ndbcluster_find_all_files(THD *thd); +#endif /* HAVE_NDB_BINLOG */ + +void ndb_unpack_record(TABLE *table, NdbValue *value, + MY_BITMAP *defined, byte *buf); + +NDB_SHARE *ndbcluster_get_share(const char *key, + TABLE *table, + bool create_if_not_exists, + bool have_lock); +NDB_SHARE *ndbcluster_get_share(NDB_SHARE *share); +void ndbcluster_free_share(NDB_SHARE **share, bool have_lock); +void ndbcluster_real_free_share(NDB_SHARE **share); +int handle_trailing_share(NDB_SHARE *share); +inline NDB_SHARE *get_share(const char *key, + TABLE *table, + bool create_if_not_exists= TRUE, + bool have_lock= FALSE) +{ + return ndbcluster_get_share(key, table, create_if_not_exists, have_lock); +} + +inline NDB_SHARE *get_share(NDB_SHARE *share) +{ + return ndbcluster_get_share(share); +} + +inline void free_share(NDB_SHARE **share, bool have_lock= FALSE) +{ + ndbcluster_free_share(share, have_lock); +} + +inline void real_free_share(NDB_SHARE **share) +{ + ndbcluster_real_free_share(share); +} + +inline +Thd_ndb * +get_thd_ndb(THD *thd) { return (Thd_ndb *) thd->ha_data[ndbcluster_hton.slot]; } + +inline +void +set_thd_ndb(THD *thd, Thd_ndb *thd_ndb) { thd->ha_data[ndbcluster_hton.slot]= thd_ndb; } + +Ndb* check_ndb_in_thd(THD* thd); diff --git a/sql/ha_ndbcluster_tables.h b/sql/ha_ndbcluster_tables.h new file mode 100644 index 00000000000..12124cd8820 --- /dev/null +++ b/sql/ha_ndbcluster_tables.h @@ -0,0 +1,21 @@ +/* Copyright (C) 2000-2003 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#define NDB_REP_DB "cluster" +#define NDB_REP_TABLE "binlog_index" +#define NDB_APPLY_TABLE "apply_status" +#define NDB_SCHEMA_TABLE "schema" diff --git a/sql/ha_partition.cc b/sql/ha_partition.cc new file mode 100644 index 00000000000..e7a324481db --- /dev/null +++ b/sql/ha_partition.cc @@ -0,0 +1,5412 @@ +/* Copyright (C) 2005 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + This handler was developed by Mikael Ronstrom for version 5.1 of MySQL. + It is an abstraction layer on top of other handlers such as MyISAM, + InnoDB, Federated, Berkeley DB and so forth. Partitioned tables can also + be handled by a storage engine. The current example of this is NDB + Cluster that has internally handled partitioning. This have benefits in + that many loops needed in the partition handler can be avoided. + + Partitioning has an inherent feature which in some cases is positive and + in some cases is negative. It splits the data into chunks. This makes + the data more manageable, queries can easily be parallelised towards the + parts and indexes are split such that there are less levels in the + index trees. The inherent disadvantage is that to use a split index + one has to scan all index parts which is ok for large queries but for + small queries it can be a disadvantage. + + Partitioning lays the foundation for more manageable databases that are + extremely large. It does also lay the foundation for more parallelism + in the execution of queries. This functionality will grow with later + versions of MySQL. + + You can enable it in your buld by doing the following during your build + process: + ./configure --with-partition + + The partition is setup to use table locks. It implements an partition "SHARE" + that is inserted into a hash by table name. You can use this to store + information of state that any partition handler object will be able to see + if it is using the same table. + + Please read the object definition in ha_partition.h before reading the rest + if this file. +*/ + +#ifdef __GNUC__ +#pragma implementation // gcc: Class implementation +#endif + +#include "mysql_priv.h" + +#include "ha_partition.h" + +static const char *ha_par_ext= ".par"; +#ifdef NOT_USED +static int free_share(PARTITION_SHARE * share); +static PARTITION_SHARE *get_share(const char *table_name, TABLE * table); +#endif + +/**************************************************************************** + MODULE create/delete handler object +****************************************************************************/ + +static handler *partition_create_handler(TABLE_SHARE *share); +static uint partition_flags(); +static uint alter_table_flags(uint flags); + +handlerton partition_hton = { + MYSQL_HANDLERTON_INTERFACE_VERSION, + "partition", + SHOW_OPTION_YES, + "Partition Storage Engine Helper", /* A comment used by SHOW to describe an engine */ + DB_TYPE_PARTITION_DB, + 0, /* Method that initializes a storage engine */ + 0, /* slot */ + 0, /* savepoint size */ + NULL /*ndbcluster_close_connection*/, + NULL, /* savepoint_set */ + NULL, /* savepoint_rollback */ + NULL, /* savepoint_release */ + NULL /*ndbcluster_commit*/, + NULL /*ndbcluster_rollback*/, + NULL, /* prepare */ + NULL, /* recover */ + NULL, /* commit_by_xid */ + NULL, /* rollback_by_xid */ + NULL, + NULL, + NULL, + partition_create_handler, /* Create a new handler */ + NULL, /* Drop a database */ + NULL, /* Panic call */ + NULL, /* Start Consistent Snapshot */ + NULL, /* Flush logs */ + NULL, /* Show status */ + partition_flags, /* Partition flags */ + alter_table_flags, /* Partition flags */ + NULL, /* Alter Tablespace */ + NULL, /* Fill FILES table */ + HTON_NOT_USER_SELECTABLE | HTON_HIDDEN, + NULL, /* binlog_func */ + NULL /* binlog_log_query */ +}; + +/* + Create new partition handler + + SYNOPSIS + partition_create_handler() + table Table object + + RETURN VALUE + New partition object +*/ + +static handler *partition_create_handler(TABLE_SHARE *share) +{ + return new ha_partition(share); +} + +/* + HA_CAN_PARTITION: + Used by storage engines that can handle partitioning without this + partition handler + (Partition, NDB) + + HA_CAN_UPDATE_PARTITION_KEY: + Set if the handler can update fields that are part of the partition + function. + + HA_CAN_PARTITION_UNIQUE: + Set if the handler can handle unique indexes where the fields of the + unique key are not part of the fields of the partition function. Thus + a unique key can be set on all fields. + + HA_USE_AUTO_PARTITION + Set if the handler sets all tables to be partitioned by default. +*/ + +static uint partition_flags() +{ + return HA_CAN_PARTITION; +} + +static uint alter_table_flags(uint flags __attribute__((unused))) +{ + return (HA_PARTITION_FUNCTION_SUPPORTED | + HA_FAST_CHANGE_PARTITION); +} + +/* + Constructor method + + SYNOPSIS + ha_partition() + table Table object + + RETURN VALUE + NONE +*/ + +ha_partition::ha_partition(TABLE_SHARE *share) + :handler(&partition_hton, share), m_part_info(NULL), m_create_handler(FALSE), + m_is_sub_partitioned(0) +{ + DBUG_ENTER("ha_partition::ha_partition(table)"); + init_handler_variables(); + DBUG_VOID_RETURN; +} + + +/* + Constructor method + + SYNOPSIS + ha_partition() + part_info Partition info + + RETURN VALUE + NONE +*/ + +ha_partition::ha_partition(partition_info *part_info) + :handler(&partition_hton, NULL), m_part_info(part_info), + m_create_handler(TRUE), + m_is_sub_partitioned(m_part_info->is_sub_partitioned()) + +{ + DBUG_ENTER("ha_partition::ha_partition(part_info)"); + init_handler_variables(); + DBUG_ASSERT(m_part_info); + DBUG_VOID_RETURN; +} + + +/* + Initialise handler object + + SYNOPSIS + init_handler_variables() + + RETURN VALUE + NONE +*/ + +void ha_partition::init_handler_variables() +{ + active_index= MAX_KEY; + m_mode= 0; + m_open_test_lock= 0; + m_file_buffer= NULL; + m_name_buffer_ptr= NULL; + m_engine_array= NULL; + m_file= NULL; + m_reorged_file= NULL; + m_reorged_parts= 0; + m_added_file= NULL; + m_tot_parts= 0; + m_has_transactions= 0; + m_pkey_is_clustered= 0; + m_lock_type= F_UNLCK; + m_part_spec.start_part= NO_CURRENT_PART_ID; + m_scan_value= 2; + m_ref_length= 0; + m_part_spec.end_part= NO_CURRENT_PART_ID; + m_index_scan_type= partition_no_index_scan; + m_start_key.key= NULL; + m_start_key.length= 0; + m_myisam= FALSE; + m_innodb= FALSE; + m_extra_cache= FALSE; + m_extra_cache_size= 0; + m_table_flags= HA_FILE_BASED | HA_REC_NOT_IN_SEQ; + m_low_byte_first= 1; + m_part_field_array= NULL; + m_ordered_rec_buffer= NULL; + m_top_entry= NO_CURRENT_PART_ID; + m_rec_length= 0; + m_last_part= 0; + m_rec0= 0; + m_curr_key_info= 0; + /* + this allows blackhole to work properly + */ + m_no_locks= 0; + +#ifdef DONT_HAVE_TO_BE_INITALIZED + m_start_key.flag= 0; + m_ordered= TRUE; +#endif +} + + +/* + Destructor method + + SYNOPSIS + ~ha_partition() + + RETURN VALUE + NONE +*/ + +ha_partition::~ha_partition() +{ + DBUG_ENTER("ha_partition::~ha_partition()"); + if (m_file != NULL) + { + uint i; + for (i= 0; i < m_tot_parts; i++) + delete m_file[i]; + } + my_free((char*) m_ordered_rec_buffer, MYF(MY_ALLOW_ZERO_PTR)); + + clear_handler_file(); + DBUG_VOID_RETURN; +} + + +/* + Initialise partition handler object + + SYNOPSIS + ha_initialise() + + RETURN VALUE + 1 Error + 0 Success + + DESCRIPTION + + The partition handler is only a layer on top of other engines. Thus it + can't really perform anything without the underlying handlers. Thus we + add this method as part of the allocation of a handler object. + + 1) Allocation of underlying handlers + If we have access to the partition info we will allocate one handler + instance for each partition. + 2) Allocation without partition info + The cases where we don't have access to this information is when called + in preparation for delete_table and rename_table and in that case we + only need to set HA_FILE_BASED. In that case we will use the .par file + that contains information about the partitions and their engines and + the names of each partition. + 3) Table flags initialisation + We need also to set table flags for the partition handler. This is not + static since it depends on what storage engines are used as underlying + handlers. + The table flags is set in this routine to simulate the behaviour of a + normal storage engine + The flag HA_FILE_BASED will be set independent of the underlying handlers + 4) Index flags initialisation + When knowledge exists on the indexes it is also possible to initialise the + index flags. Again the index flags must be initialised by using the under- + lying handlers since this is storage engine dependent. + The flag HA_READ_ORDER will be reset for the time being to indicate no + ordered output is available from partition handler indexes. Later a merge + sort will be performed using the underlying handlers. + 5) primary_key_is_clustered, has_transactions and low_byte_first is + calculated here. + +*/ + +int ha_partition::ha_initialise() +{ + handler **file_array, *file; + DBUG_ENTER("ha_partition::ha_initialise"); + + if (m_create_handler) + { + m_tot_parts= m_part_info->get_tot_partitions(); + DBUG_ASSERT(m_tot_parts > 0); + if (new_handlers_from_part_info()) + DBUG_RETURN(1); + } + else if (!table_share || !table_share->normalized_path.str) + { + /* + Called with dummy table share (delete, rename and alter table) + Don't need to set-up table flags other than + HA_FILE_BASED here + */ + m_table_flags|= HA_FILE_BASED | HA_REC_NOT_IN_SEQ; + DBUG_RETURN(0); + } + else if (get_from_handler_file(table_share->normalized_path.str)) + { + mem_alloc_error(2); + DBUG_RETURN(1); + } + /* + We create all underlying table handlers here. We do it in this special + method to be able to report allocation errors. + + Set up table_flags, low_byte_first, primary_key_is_clustered and + has_transactions since they are called often in all kinds of places, + other parameters are calculated on demand. + HA_FILE_BASED is always set for partition handler since we use a + special file for handling names of partitions, engine types. + HA_CAN_GEOMETRY, HA_CAN_FULLTEXT, HA_CAN_SQL_HANDLER, HA_DUPP_POS, + HA_CAN_INSERT_DELAYED is disabled until further investigated. + */ + m_table_flags= m_file[0]->table_flags(); + m_low_byte_first= m_file[0]->low_byte_first(); + m_has_transactions= TRUE; + m_pkey_is_clustered= TRUE; + file_array= m_file; + do + { + file= *file_array; + if (m_low_byte_first != file->low_byte_first()) + { + // Cannot have handlers with different endian + my_error(ER_MIX_HANDLER_ERROR, MYF(0)); + DBUG_RETURN(1); + } + if (!file->has_transactions()) + m_has_transactions= FALSE; + if (!file->primary_key_is_clustered()) + m_pkey_is_clustered= FALSE; + m_table_flags&= file->table_flags(); + } while (*(++file_array)); + m_table_flags&= ~(HA_CAN_GEOMETRY | HA_CAN_FULLTEXT | HA_DUPP_POS | + HA_CAN_SQL_HANDLER | HA_CAN_INSERT_DELAYED); + m_table_flags|= HA_FILE_BASED | HA_REC_NOT_IN_SEQ; + DBUG_RETURN(0); +} + +/**************************************************************************** + MODULE meta data changes +****************************************************************************/ +/* + Create partition names + + SYNOPSIS + create_partition_name() + out:out Created partition name string + in1 First part + in2 Second part + name_variant Normal, temporary or renamed partition name + + RETURN VALUE + NONE + + DESCRIPTION + This method is used to calculate the partition name, service routine to + the del_ren_cre_table method. +*/ + +#define NORMAL_PART_NAME 0 +#define TEMP_PART_NAME 1 +#define RENAMED_PART_NAME 2 +static void create_partition_name(char *out, const char *in1, + const char *in2, uint name_variant, + bool translate) +{ + char transl_part_name[FN_REFLEN]; + const char *transl_part; + + if (translate) + { + tablename_to_filename(in2, transl_part_name, FN_REFLEN); + transl_part= transl_part_name; + } + else + transl_part= in2; + if (name_variant == NORMAL_PART_NAME) + strxmov(out, in1, "#P#", transl_part, NullS); + else if (name_variant == TEMP_PART_NAME) + strxmov(out, in1, "#P#", transl_part, "#TMP#", NullS); + else if (name_variant == RENAMED_PART_NAME) + strxmov(out, in1, "#P#", transl_part, "#REN#", NullS); +} + +/* + Create subpartition name + + SYNOPSIS + create_subpartition_name() + out:out Created partition name string + in1 First part + in2 Second part + in3 Third part + name_variant Normal, temporary or renamed partition name + + RETURN VALUE + NONE + + DESCRIPTION + This method is used to calculate the subpartition name, service routine to + the del_ren_cre_table method. +*/ + +static void create_subpartition_name(char *out, const char *in1, + const char *in2, const char *in3, + uint name_variant) +{ + char transl_part_name[FN_REFLEN], transl_subpart_name[FN_REFLEN]; + + tablename_to_filename(in2, transl_part_name, FN_REFLEN); + tablename_to_filename(in3, transl_subpart_name, FN_REFLEN); + if (name_variant == NORMAL_PART_NAME) + strxmov(out, in1, "#P#", transl_part_name, + "#SP#", transl_subpart_name, NullS); + else if (name_variant == TEMP_PART_NAME) + strxmov(out, in1, "#P#", transl_part_name, + "#SP#", transl_subpart_name, "#TMP#", NullS); + else if (name_variant == RENAMED_PART_NAME) + strxmov(out, in1, "#P#", transl_part_name, + "#SP#", transl_subpart_name, "#REN#", NullS); +} + + +/* + Delete a table + + SYNOPSIS + delete_table() + name Full path of table name + + RETURN VALUE + >0 Error + 0 Success + + DESCRIPTION + Used to delete a table. By the time delete_table() has been called all + opened references to this table will have been closed (and your globally + shared references released. The variable name will just be the name of + the table. You will need to remove any files you have created at this + point. + + If you do not implement this, the default delete_table() is called from + handler.cc and it will delete all files with the file extentions returned + by bas_ext(). + + Called from handler.cc by delete_table and ha_create_table(). Only used + during create if the table_flag HA_DROP_BEFORE_CREATE was specified for + the storage engine. +*/ + +int ha_partition::delete_table(const char *name) +{ + int error; + DBUG_ENTER("ha_partition::delete_table"); + + if ((error= del_ren_cre_table(name, NULL, NULL, NULL))) + DBUG_RETURN(error); + DBUG_RETURN(handler::delete_table(name)); +} + + +/* + Rename a table + + SYNOPSIS + rename_table() + from Full path of old table name + to Full path of new table name + + RETURN VALUE + >0 Error + 0 Success + + DESCRIPTION + Renames a table from one name to another from alter table call. + + If you do not implement this, the default rename_table() is called from + handler.cc and it will rename all files with the file extentions returned + by bas_ext(). + + Called from sql_table.cc by mysql_rename_table(). +*/ + +int ha_partition::rename_table(const char *from, const char *to) +{ + int error; + DBUG_ENTER("ha_partition::rename_table"); + + if ((error= del_ren_cre_table(from, to, NULL, NULL))) + DBUG_RETURN(error); + DBUG_RETURN(handler::rename_table(from, to)); +} + + +/* + Create the handler file (.par-file) + + SYNOPSIS + create_handler_files() + name Full path of table name + + RETURN VALUE + >0 Error + 0 Success + + DESCRIPTION + create_handler_files is called to create any handler specific files + before opening the file with openfrm to later call ::create on the + file object. + In the partition handler this is used to store the names of partitions + and types of engines in the partitions. +*/ + +int ha_partition::create_handler_files(const char *name) +{ + DBUG_ENTER("ha_partition::create_handler_files()"); + + /* + We need to update total number of parts since we might write the handler + file as part of a partition management command + */ + if (create_handler_file(name)) + { + my_error(ER_CANT_CREATE_HANDLER_FILE, MYF(0)); + DBUG_RETURN(1); + } + DBUG_RETURN(0); +} + + +/* + Create a partitioned table + + SYNOPSIS + create() + name Full path of table name + table_arg Table object + create_info Create info generated for CREATE TABLE + + RETURN VALUE + >0 Error + 0 Success + + DESCRIPTION + create() is called to create a table. The variable name will have the name + of the table. When create() is called you do not need to worry about + opening the table. Also, the FRM file will have already been created so + adjusting create_info will not do you any good. You can overwrite the frm + file at this point if you wish to change the table definition, but there + are no methods currently provided for doing that. + + Called from handler.cc by ha_create_table(). +*/ + +int ha_partition::create(const char *name, TABLE *table_arg, + HA_CREATE_INFO *create_info) +{ + char t_name[FN_REFLEN]; + DBUG_ENTER("ha_partition::create"); + + strmov(t_name, name); + DBUG_ASSERT(*fn_rext((char*)name) == '\0'); + if (del_ren_cre_table(t_name, NULL, table_arg, create_info)) + { + handler::delete_table(t_name); + DBUG_RETURN(1); + } + DBUG_RETURN(0); +} + + +/* + Drop partitions as part of ALTER TABLE of partitions + + SYNOPSIS + drop_partitions() + path Complete path of db and table name + + RETURN VALUE + >0 Failure + 0 Success + + DESCRIPTION + Use part_info object on handler object to deduce which partitions to + drop (each partition has a state attached to it) +*/ + +int ha_partition::drop_partitions(const char *path) +{ + List_iterator<partition_element> part_it(m_part_info->partitions); + List_iterator<partition_element> temp_it(m_part_info->temp_partitions); + char part_name_buff[FN_REFLEN]; + uint no_parts= m_part_info->partitions.elements; + uint part_count= 0; + uint no_subparts= m_part_info->no_subparts; + uint i= 0; + uint name_variant; + int error= 1; + bool reorged_parts= (m_reorged_parts > 0); + bool temp_partitions= (m_part_info->temp_partitions.elements > 0); + DBUG_ENTER("ha_partition::drop_partitions"); + + if (temp_partitions) + no_parts= m_part_info->temp_partitions.elements; + do + { + partition_element *part_elem; + if (temp_partitions) + { + /* + We need to remove the reorganised partitions that were put in the + temp_partitions-list. + */ + part_elem= temp_it++; + DBUG_ASSERT(part_elem->part_state == PART_TO_BE_DROPPED); + } + else + part_elem= part_it++; + if (part_elem->part_state == PART_TO_BE_DROPPED || + part_elem->part_state == PART_IS_CHANGED) + { + handler *file; + /* + This part is to be dropped, meaning the part or all its subparts. + */ + name_variant= NORMAL_PART_NAME; + if (part_elem->part_state == PART_IS_CHANGED || + (part_elem->part_state == PART_TO_BE_DROPPED && temp_partitions)) + name_variant= RENAMED_PART_NAME; + if (m_is_sub_partitioned) + { + List_iterator<partition_element> sub_it(part_elem->subpartitions); + uint j= 0, part; + do + { + partition_element *sub_elem= sub_it++; + part= i * no_subparts + j; + create_subpartition_name(part_name_buff, path, + part_elem->partition_name, + sub_elem->partition_name, name_variant); + if (reorged_parts) + file= m_reorged_file[part_count++]; + else + file= m_file[part]; + DBUG_PRINT("info", ("Drop subpartition %s", part_name_buff)); + error= file->delete_table((const char *) part_name_buff); + } while (++j < no_subparts); + } + else + { + create_partition_name(part_name_buff, path, + part_elem->partition_name, name_variant, + TRUE); + if (reorged_parts) + file= m_reorged_file[part_count++]; + else + file= m_file[i]; + DBUG_PRINT("info", ("Drop partition %s", part_name_buff)); + error= file->delete_table((const char *) part_name_buff); + } + if (part_elem->part_state == PART_IS_CHANGED) + part_elem->part_state= PART_NORMAL; + else + part_elem->part_state= PART_IS_DROPPED; + } + } while (++i < no_parts); + DBUG_RETURN(error); +} + + +/* + Rename partitions as part of ALTER TABLE of partitions + + SYNOPSIS + rename_partitions() + path Complete path of db and table name + + RETURN VALUE + TRUE Failure + FALSE Success + + DESCRIPTION + When reorganising partitions, adding hash partitions and coalescing + partitions it can be necessary to rename partitions while holding + an exclusive lock on the table. + Which partitions to rename is given by state of partitions found by the + partition info struct referenced from the handler object +*/ + +int ha_partition::rename_partitions(const char *path) +{ + List_iterator<partition_element> part_it(m_part_info->partitions); + List_iterator<partition_element> temp_it(m_part_info->temp_partitions); + char part_name_buff[FN_REFLEN]; + char norm_name_buff[FN_REFLEN]; + uint no_parts= m_part_info->partitions.elements; + uint part_count= 0; + uint no_subparts= m_part_info->no_subparts; + uint i= 0; + uint j= 0; + int error= 1; + uint temp_partitions= m_part_info->temp_partitions.elements; + handler *file; + partition_element *part_elem, *sub_elem; + DBUG_ENTER("ha_partition::rename_partitions"); + + if (temp_partitions) + { + do + { + part_elem= temp_it++; + if (m_is_sub_partitioned) + { + List_iterator<partition_element> sub_it(part_elem->subpartitions); + do + { + sub_elem= sub_it++; + file= m_reorged_file[part_count++]; + create_subpartition_name(part_name_buff, path, + part_elem->partition_name, + sub_elem->partition_name, + RENAMED_PART_NAME); + create_subpartition_name(norm_name_buff, path, + part_elem->partition_name, + sub_elem->partition_name, + NORMAL_PART_NAME); + DBUG_PRINT("info", ("Rename subpartition from %s to %s", + norm_name_buff, part_name_buff)); + error= file->rename_table((const char *) norm_name_buff, + (const char *) part_name_buff); + } while (++j < no_subparts); + } + else + { + file= m_reorged_file[part_count++]; + create_partition_name(part_name_buff, path, + part_elem->partition_name, RENAMED_PART_NAME, + TRUE); + create_partition_name(norm_name_buff, path, + part_elem->partition_name, NORMAL_PART_NAME, + TRUE); + DBUG_PRINT("info", ("Rename partition from %s to %s", + norm_name_buff, part_name_buff)); + error= file->rename_table((const char *) norm_name_buff, + (const char *) part_name_buff); + } + } while (++i < temp_partitions); + } + i= 0; + do + { + part_elem= part_it++; + if (part_elem->part_state == PART_IS_CHANGED || + (part_elem->part_state == PART_IS_ADDED && temp_partitions)) + { + if (m_is_sub_partitioned) + { + List_iterator<partition_element> sub_it(part_elem->subpartitions); + uint part; + + j= 0; + do + { + sub_elem= sub_it++; + part= i * no_subparts + j; + create_subpartition_name(norm_name_buff, path, + part_elem->partition_name, + sub_elem->partition_name, + NORMAL_PART_NAME); + if (part_elem->part_state == PART_IS_CHANGED) + { + file= m_reorged_file[part_count++]; + create_subpartition_name(part_name_buff, path, + part_elem->partition_name, + sub_elem->partition_name, + RENAMED_PART_NAME); + DBUG_PRINT("info", ("Rename subpartition from %s to %s", + norm_name_buff, part_name_buff)); + error= file->rename_table((const char *) norm_name_buff, + (const char *) part_name_buff); + } + file= m_new_file[part]; + create_subpartition_name(part_name_buff, path, + part_elem->partition_name, + sub_elem->partition_name, + TEMP_PART_NAME); + DBUG_PRINT("info", ("Rename subpartition from %s to %s", + part_name_buff, norm_name_buff)); + error= file->rename_table((const char *) part_name_buff, + (const char *) norm_name_buff); + } while (++j < no_subparts); + } + else + { + create_partition_name(norm_name_buff, path, + part_elem->partition_name, NORMAL_PART_NAME, + TRUE); + if (part_elem->part_state == PART_IS_CHANGED) + { + file= m_reorged_file[part_count++]; + create_partition_name(part_name_buff, path, + part_elem->partition_name, RENAMED_PART_NAME, + TRUE); + DBUG_PRINT("info", ("Rename partition from %s to %s", + norm_name_buff, part_name_buff)); + error= file->rename_table((const char *) norm_name_buff, + (const char *) part_name_buff); + } + file= m_new_file[i]; + create_partition_name(part_name_buff, path, + part_elem->partition_name, TEMP_PART_NAME, + TRUE); + DBUG_PRINT("info", ("Rename partition from %s to %s", + part_name_buff, norm_name_buff)); + error= file->rename_table((const char *) part_name_buff, + (const char *) norm_name_buff); + } + } + } while (++i < no_parts); + DBUG_RETURN(error); +} + + +#define OPTIMIZE_PARTS 1 +#define ANALYZE_PARTS 2 +#define CHECK_PARTS 3 +#define REPAIR_PARTS 4 + +/* + Optimize table + + SYNOPSIS + optimize() + thd Thread object + check_opt Check/analyze/repair/optimize options + + RETURN VALUES + >0 Error + 0 Success +*/ + +int ha_partition::optimize(THD *thd, HA_CHECK_OPT *check_opt) +{ + DBUG_ENTER("ha_partition::optimize"); + + DBUG_RETURN(handle_opt_partitions(thd, &thd->lex->check_opt, + OPTIMIZE_PARTS, TRUE)); +} + + +/* + Analyze table + + SYNOPSIS + analyze() + thd Thread object + check_opt Check/analyze/repair/optimize options + + RETURN VALUES + >0 Error + 0 Success +*/ + +int ha_partition::analyze(THD *thd, HA_CHECK_OPT *check_opt) +{ + DBUG_ENTER("ha_partition::analyze"); + + DBUG_RETURN(handle_opt_partitions(thd, &thd->lex->check_opt, + ANALYZE_PARTS, TRUE)); +} + + +/* + Check table + + SYNOPSIS + check() + thd Thread object + check_opt Check/analyze/repair/optimize options + + RETURN VALUES + >0 Error + 0 Success +*/ + +int ha_partition::check(THD *thd, HA_CHECK_OPT *check_opt) +{ + DBUG_ENTER("ha_partition::check"); + + DBUG_RETURN(handle_opt_partitions(thd, &thd->lex->check_opt, + CHECK_PARTS, TRUE)); +} + + +/* + Repair table + + SYNOPSIS + repair() + thd Thread object + check_opt Check/analyze/repair/optimize options + + RETURN VALUES + >0 Error + 0 Success +*/ + +int ha_partition::repair(THD *thd, HA_CHECK_OPT *check_opt) +{ + DBUG_ENTER("ha_partition::repair"); + + DBUG_RETURN(handle_opt_partitions(thd, &thd->lex->check_opt, + REPAIR_PARTS, TRUE)); +} + +/* + Optimize partitions + + SYNOPSIS + optimize_partitions() + thd Thread object + RETURN VALUE + >0 Failure + 0 Success + DESCRIPTION + Call optimize on each partition marked with partition state PART_CHANGED +*/ + +int ha_partition::optimize_partitions(THD *thd) +{ + DBUG_ENTER("ha_partition::optimize_partitions"); + + DBUG_RETURN(handle_opt_partitions(thd, &thd->lex->check_opt, + OPTIMIZE_PARTS, FALSE)); +} + +/* + Analyze partitions + + SYNOPSIS + analyze_partitions() + thd Thread object + RETURN VALUE + >0 Failure + 0 Success + DESCRIPTION + Call analyze on each partition marked with partition state PART_CHANGED +*/ + +int ha_partition::analyze_partitions(THD *thd) +{ + DBUG_ENTER("ha_partition::analyze_partitions"); + + DBUG_RETURN(handle_opt_partitions(thd, &thd->lex->check_opt, + ANALYZE_PARTS, FALSE)); +} + +/* + Check partitions + + SYNOPSIS + check_partitions() + thd Thread object + RETURN VALUE + >0 Failure + 0 Success + DESCRIPTION + Call check on each partition marked with partition state PART_CHANGED +*/ + +int ha_partition::check_partitions(THD *thd) +{ + DBUG_ENTER("ha_partition::check_partitions"); + + DBUG_RETURN(handle_opt_partitions(thd, &thd->lex->check_opt, + CHECK_PARTS, FALSE)); +} + +/* + Repair partitions + + SYNOPSIS + repair_partitions() + thd Thread object + RETURN VALUE + >0 Failure + 0 Success + DESCRIPTION + Call repair on each partition marked with partition state PART_CHANGED +*/ + +int ha_partition::repair_partitions(THD *thd) +{ + DBUG_ENTER("ha_partition::repair_partitions"); + + DBUG_RETURN(handle_opt_partitions(thd, &thd->lex->check_opt, + REPAIR_PARTS, FALSE)); +} + + +/* + Handle optimize/analyze/check/repair of one partition + + SYNOPSIS + handle_opt_part() + thd Thread object + check_opt Options + file Handler object of partition + flag Optimize/Analyze/Check/Repair flag + + RETURN VALUE + >0 Failure + 0 Success +*/ + +static int handle_opt_part(THD *thd, HA_CHECK_OPT *check_opt, + handler *file, uint flag) +{ + int error; + DBUG_ENTER("handle_opt_part"); + DBUG_PRINT("enter", ("flag = %u", flag)); + + if (flag == OPTIMIZE_PARTS) + error= file->optimize(thd, check_opt); + else if (flag == ANALYZE_PARTS) + error= file->analyze(thd, check_opt); + else if (flag == CHECK_PARTS) + error= file->ha_check(thd, check_opt); + else if (flag == REPAIR_PARTS) + error= file->ha_repair(thd, check_opt); + else + { + DBUG_ASSERT(FALSE); + error= 1; + } + if (error == HA_ADMIN_ALREADY_DONE) + error= 0; + DBUG_RETURN(error); +} + + +/* + Handle optimize/analyze/check/repair of partitions + + SYNOPSIS + handle_opt_partitions() + thd Thread object + check_opt Options + flag Optimize/Analyze/Check/Repair flag + all_parts All partitions or only a subset + + RETURN VALUE + >0 Failure + 0 Success +*/ + +int ha_partition::handle_opt_partitions(THD *thd, HA_CHECK_OPT *check_opt, + uint flag, bool all_parts) +{ + List_iterator<partition_element> part_it(m_part_info->partitions); + uint no_parts= m_part_info->no_parts; + uint no_subparts= m_part_info->no_subparts; + uint i= 0; + LEX *lex= thd->lex; + int error; + DBUG_ENTER("ha_partition::handle_opt_partitions"); + DBUG_PRINT("enter", ("all_parts %u, flag= %u", all_parts, flag)); + + do + { + partition_element *part_elem= part_it++; + if (all_parts || part_elem->part_state == PART_CHANGED) + { + handler *file; + if (m_is_sub_partitioned) + { + List_iterator<partition_element> sub_it(part_elem->subpartitions); + uint j= 0, part; + do + { + partition_element *sub_elem= sub_it++; + part= i * no_subparts + j; + DBUG_PRINT("info", ("Optimize subpartition %u", + part)); + if ((error= handle_opt_part(thd, check_opt, m_file[part], flag))) + { + my_error(ER_GET_ERRNO, MYF(0), error); + DBUG_RETURN(TRUE); + } + } while (++j < no_subparts); + } + else + { + DBUG_PRINT("info", ("Optimize partition %u", i)); + if ((error= handle_opt_part(thd, check_opt, m_file[i], flag))) + { + my_error(ER_GET_ERRNO, MYF(0), error); + DBUG_RETURN(TRUE); + } + } + } + } while (++i < no_parts); + DBUG_RETURN(FALSE); +} + +/* + Prepare by creating a new partition + + SYNOPSIS + prepare_new_partition() + table Table object + create_info Create info from CREATE TABLE + file Handler object of new partition + part_name partition name + + RETURN VALUE + >0 Error + 0 Success +*/ + +int ha_partition::prepare_new_partition(TABLE *table, + HA_CREATE_INFO *create_info, + handler *file, const char *part_name) +{ + int error; + bool create_flag= FALSE; + bool open_flag= FALSE; + DBUG_ENTER("prepare_new_partition"); + + if ((error= file->create(part_name, table, create_info))) + goto error; + create_flag= TRUE; + if ((error= file->ha_open(table, part_name, m_mode, m_open_test_lock))) + goto error; + if ((error= file->external_lock(current_thd, m_lock_type))) + goto error; + + DBUG_RETURN(0); +error: + if (create_flag) + VOID(file->delete_table(part_name)); + print_error(error, MYF(0)); + DBUG_RETURN(error); +} + + +/* + Cleanup by removing all created partitions after error + + SYNOPSIS + cleanup_new_partition() + part_count Number of partitions to remove + + RETURN VALUE + NONE + + DESCRIPTION + TODO: + We must ensure that in the case that we get an error during the process + that we call external_lock with F_UNLCK, close the table and delete the + table in the case where we have been successful with prepare_handler. + We solve this by keeping an array of successful calls to prepare_handler + which can then be used to undo the call. +*/ + +void ha_partition::cleanup_new_partition(uint part_count) +{ + handler **save_m_file= m_file; + DBUG_ENTER("ha_partition::cleanup_new_partition"); + + if (m_added_file && m_added_file[0]) + { + m_file= m_added_file; + m_added_file= NULL; + + external_lock(current_thd, F_UNLCK); + /* delete_table also needed, a bit more complex */ + close(); + + m_added_file= m_file; + m_file= save_m_file; + } + DBUG_VOID_RETURN; +} + +/* + Implement the partition changes defined by ALTER TABLE of partitions + + SYNOPSIS + change_partitions() + create_info HA_CREATE_INFO object describing all + fields and indexes in table + path Complete path of db and table name + out: copied Output parameter where number of copied + records are added + out: deleted Output parameter where number of deleted + records are added + pack_frm_data Reference to packed frm file + pack_frm_len Length of packed frm file + + RETURN VALUE + >0 Failure + 0 Success + + DESCRIPTION + Add and copy if needed a number of partitions, during this operation + no other operation is ongoing in the server. This is used by + ADD PARTITION all types as well as by REORGANIZE PARTITION. For + one-phased implementations it is used also by DROP and COALESCE + PARTITIONs. + One-phased implementation needs the new frm file, other handlers will + get zero length and a NULL reference here. +*/ + +int ha_partition::change_partitions(HA_CREATE_INFO *create_info, + const char *path, + ulonglong *copied, + ulonglong *deleted, + const void *pack_frm_data + __attribute__((unused)), + uint pack_frm_len + __attribute__((unused))) +{ + List_iterator<partition_element> part_it(m_part_info->partitions); + List_iterator <partition_element> t_it(m_part_info->temp_partitions); + char part_name_buff[FN_REFLEN]; + uint no_parts= m_part_info->partitions.elements; + uint no_subparts= m_part_info->no_subparts; + uint i= 0; + uint no_remain_partitions, part_count; + handler **new_file_array; + int error= 1; + bool first; + bool copy_parts= FALSE; + uint temp_partitions= m_part_info->temp_partitions.elements; + THD *thd= current_thd; + DBUG_ENTER("ha_partition::change_partitions"); + + m_reorged_parts= 0; + if (!m_part_info->is_sub_partitioned()) + no_subparts= 1; + + /* + Step 1: + Calculate number of reorganised partitions and allocate space for + their handler references. + */ + if (temp_partitions) + { + m_reorged_parts= temp_partitions * no_subparts; + } + else + { + do + { + partition_element *part_elem= part_it++; + if (part_elem->part_state == PART_CHANGED || + part_elem->part_state == PART_REORGED_DROPPED) + { + m_reorged_parts+= no_subparts; + } + } while (++i < no_parts); + } + if (m_reorged_parts && + !(m_reorged_file= (handler**)sql_calloc(sizeof(partition_element*)* + (m_reorged_parts + 1)))) + { + mem_alloc_error(sizeof(partition_element*)*(m_reorged_parts+1)); + DBUG_RETURN(TRUE); + } + + /* + Step 2: + Calculate number of partitions after change and allocate space for + their handler references. + */ + no_remain_partitions= 0; + if (temp_partitions) + { + no_remain_partitions= no_parts * no_subparts; + } + else + { + part_it.rewind(); + i= 0; + do + { + partition_element *part_elem= part_it++; + if (part_elem->part_state == PART_NORMAL || + part_elem->part_state == PART_TO_BE_ADDED || + part_elem->part_state == PART_CHANGED) + { + no_remain_partitions+= no_subparts; + } + } while (++i < no_parts); + } + if (!(new_file_array= (handler**)sql_calloc(sizeof(handler*)* + (2*(no_remain_partitions + 1))))) + { + mem_alloc_error(sizeof(handler*)*2*(no_remain_partitions+1)); + DBUG_RETURN(TRUE); + } + m_added_file= &new_file_array[no_remain_partitions + 1]; + + /* + Step 3: + Fill m_reorged_file with handler references and NULL at the end + */ + if (m_reorged_parts) + { + i= 0; + part_count= 0; + first= TRUE; + part_it.rewind(); + do + { + partition_element *part_elem= part_it++; + if (part_elem->part_state == PART_CHANGED || + part_elem->part_state == PART_REORGED_DROPPED) + { + memcpy((void*)&m_reorged_file[part_count], + (void*)&m_file[i*no_subparts], + sizeof(handler*)*no_subparts); + part_count+= no_subparts; + } + else if (first && temp_partitions && + part_elem->part_state == PART_TO_BE_ADDED) + { + /* + When doing an ALTER TABLE REORGANIZE PARTITION a number of + partitions is to be reorganised into a set of new partitions. + The reorganised partitions are in this case in the temp_partitions + list. We copy all of them in one batch and thus we only do this + until we find the first partition with state PART_TO_BE_ADDED + since this is where the new partitions go in and where the old + ones used to be. + */ + first= FALSE; + memcpy((void*)m_reorged_file, &m_file[i*no_subparts], + sizeof(handler*)*m_reorged_parts*no_subparts); + } + } while (++i < no_parts); + } + + /* + Step 4: + Fill new_array_file with handler references. Create the handlers if + needed. + */ + i= 0; + part_count= 0; + part_it.rewind(); + do + { + partition_element *part_elem= part_it++; + if (part_elem->part_state == PART_NORMAL) + { + memcpy((void*)&new_file_array[part_count], (void*)&m_file[i], + sizeof(handler*)*no_subparts); + part_count+= no_subparts; + } + else if (part_elem->part_state == PART_CHANGED || + part_elem->part_state == PART_TO_BE_ADDED) + { + uint j= 0; + do + { + if (!(new_file_array[part_count++]= get_new_handler(table->s, + thd->mem_root, + part_elem->engine_type))) + { + mem_alloc_error(sizeof(handler)); + DBUG_RETURN(TRUE); + } + } while (++j < no_subparts); + } + } while (++i < no_parts); + + /* + Step 5: + Create the new partitions and also open, lock and call external_lock + on them to prepare them for copy phase and also for later close + calls + */ + i= 0; + part_count= 0; + part_it.rewind(); + do + { + partition_element *part_elem= part_it++; + if (part_elem->part_state == PART_TO_BE_ADDED || + part_elem->part_state == PART_CHANGED) + { + /* + A new partition needs to be created PART_TO_BE_ADDED means an + entirely new partition and PART_CHANGED means a changed partition + that will still exist with either more or less data in it. + */ + uint name_variant= NORMAL_PART_NAME; + if (part_elem->part_state == PART_CHANGED || + (part_elem->part_state == PART_TO_BE_ADDED && temp_partitions)) + name_variant= TEMP_PART_NAME; + if (m_part_info->is_sub_partitioned()) + { + List_iterator<partition_element> sub_it(part_elem->subpartitions); + uint j= 0, part; + do + { + partition_element *sub_elem= sub_it++; + create_subpartition_name(part_name_buff, path, + part_elem->partition_name, + sub_elem->partition_name, + name_variant); + part= i * no_subparts + j; + DBUG_PRINT("info", ("Add subpartition %s", part_name_buff)); + if ((error= prepare_new_partition(table, create_info, + new_file_array[part], + (const char *)part_name_buff))) + { + cleanup_new_partition(part_count); + DBUG_RETURN(TRUE); + } + m_added_file[part_count++]= new_file_array[part]; + } while (++j < no_subparts); + } + else + { + create_partition_name(part_name_buff, path, + part_elem->partition_name, name_variant, + TRUE); + DBUG_PRINT("info", ("Add partition %s", part_name_buff)); + if ((error= prepare_new_partition(table, create_info, + new_file_array[i], + (const char *)part_name_buff))) + { + cleanup_new_partition(part_count); + DBUG_RETURN(TRUE); + } + m_added_file[part_count++]= new_file_array[i]; + } + } + } while (++i < no_parts); + + /* + Step 6: + State update to prepare for next write of the frm file. + */ + i= 0; + part_it.rewind(); + do + { + partition_element *part_elem= part_it++; + if (part_elem->part_state == PART_TO_BE_ADDED) + part_elem->part_state= PART_IS_ADDED; + else if (part_elem->part_state == PART_CHANGED) + part_elem->part_state= PART_IS_CHANGED; + else if (part_elem->part_state == PART_REORGED_DROPPED) + part_elem->part_state= PART_TO_BE_DROPPED; + } while (++i < no_parts); + for (i= 0; i < temp_partitions; i++) + { + partition_element *part_elem= t_it++; + DBUG_ASSERT(part_elem->part_state == PART_TO_BE_REORGED); + part_elem->part_state= PART_TO_BE_DROPPED; + } + m_new_file= new_file_array; + DBUG_RETURN(copy_partitions(copied, deleted)); +} + + +/* + Copy partitions as part of ALTER TABLE of partitions + + SYNOPSIS + copy_partitions() + out:copied Number of records copied + out:deleted Number of records deleted + + RETURN VALUE + >0 Error code + 0 Success + + DESCRIPTION + change_partitions has done all the preparations, now it is time to + actually copy the data from the reorganised partitions to the new + partitions. +*/ + +int ha_partition::copy_partitions(ulonglong *copied, ulonglong *deleted) +{ + uint reorg_part= 0; + int result= 0; + longlong func_value; + DBUG_ENTER("ha_partition::copy_partitions"); + + while (reorg_part < m_reorged_parts) + { + handler *file= m_reorged_file[reorg_part]; + uint32 new_part; + + late_extra_cache(reorg_part); + if ((result= file->ha_rnd_init(1))) + goto error; + while (TRUE) + { + if ((result= file->rnd_next(m_rec0))) + { + if (result == HA_ERR_RECORD_DELETED) + continue; //Probably MyISAM + if (result != HA_ERR_END_OF_FILE) + goto error; + /* + End-of-file reached, break out to continue with next partition or + end the copy process. + */ + break; + } + /* Found record to insert into new handler */ + if (m_part_info->get_partition_id(m_part_info, &new_part, + &func_value)) + { + /* + This record is in the original table but will not be in the new + table since it doesn't fit into any partition any longer due to + changed partitioning ranges or list values. + */ + deleted++; + } + else + { + /* Copy record to new handler */ + copied++; + if ((result= m_new_file[new_part]->write_row(m_rec0))) + goto error; + } + } + late_extra_no_cache(reorg_part); + file->rnd_end(); + reorg_part++; + } + DBUG_RETURN(FALSE); +error: + print_error(result, MYF(0)); + DBUG_RETURN(TRUE); +} + + +/* + Update create info as part of ALTER TABLE + + SYNOPSIS + update_create_info() + create_info Create info from ALTER TABLE + + RETURN VALUE + NONE + + DESCRIPTION + Method empty so far +*/ + +void ha_partition::update_create_info(HA_CREATE_INFO *create_info) +{ + return; +} + + +/* + Change comments specific to handler + + SYNOPSIS + update_table_comment() + comment Original comment + + RETURN VALUE + new comment + + DESCRIPTION + No comment changes so far +*/ + +char *ha_partition::update_table_comment(const char *comment) +{ + return (char*) comment; /* Nothing to change */ +} + + + +/* + Handle delete, rename and create table + + SYNOPSIS + del_ren_cre_table() + from Full path of old table + to Full path of new table + table_arg Table object + create_info Create info + + RETURN VALUE + >0 Error + 0 Success + + DESCRIPTION + Common routine to handle delete_table and rename_table. + The routine uses the partition handler file to get the + names of the partition instances. Both these routines + are called after creating the handler without table + object and thus the file is needed to discover the + names of the partitions and the underlying storage engines. +*/ + +uint ha_partition::del_ren_cre_table(const char *from, + const char *to, + TABLE *table_arg, + HA_CREATE_INFO *create_info) +{ + int save_error= 0; + int error; + char from_buff[FN_REFLEN], to_buff[FN_REFLEN]; + char *name_buffer_ptr; + uint i; + handler **file; + DBUG_ENTER("del_ren_cre_table()"); + + if (get_from_handler_file(from)) + DBUG_RETURN(TRUE); + DBUG_ASSERT(m_file_buffer); + name_buffer_ptr= m_name_buffer_ptr; + file= m_file; + i= 0; + do + { + create_partition_name(from_buff, from, name_buffer_ptr, NORMAL_PART_NAME, + FALSE); + if (to != NULL) + { // Rename branch + create_partition_name(to_buff, to, name_buffer_ptr, NORMAL_PART_NAME, + FALSE); + error= (*file)->rename_table((const char*) from_buff, + (const char*) to_buff); + } + else if (table_arg == NULL) // delete branch + error= (*file)->delete_table((const char*) from_buff); + else + { + set_up_table_before_create(table_arg, from_buff, create_info, i); + error= (*file)->create(from_buff, table_arg, create_info); + } + name_buffer_ptr= strend(name_buffer_ptr) + 1; + if (error) + save_error= error; + i++; + } while (*(++file)); + DBUG_RETURN(save_error); +} + +/* + Find partition based on partition id + + SYNOPSIS + find_partition_element() + part_id Partition id of partition looked for + + RETURN VALUE + >0 Reference to partition_element + 0 Partition not found +*/ + +partition_element *ha_partition::find_partition_element(uint part_id) +{ + uint i; + uint curr_part_id= 0; + List_iterator_fast <partition_element> part_it(m_part_info->partitions); + + for (i= 0; i < m_part_info->no_parts; i++) + { + partition_element *part_elem; + part_elem= part_it++; + if (m_is_sub_partitioned) + { + uint j; + List_iterator_fast <partition_element> sub_it(part_elem->subpartitions); + for (j= 0; j < m_part_info->no_subparts; j++) + { + part_elem= sub_it++; + if (part_id == curr_part_id++) + return part_elem; + } + } + else if (part_id == curr_part_id++) + return part_elem; + } + DBUG_ASSERT(0); + current_thd->fatal_error(); // Abort + return NULL; +} + + +/* + Set up table share object before calling create on underlying handler + + SYNOPSIS + set_up_table_before_create() + table Table object + info Create info + part_id Partition id of partition to set-up + + RETURN VALUE + NONE + + DESCRIPTION + Set up + 1) Comment on partition + 2) MAX_ROWS, MIN_ROWS on partition + 3) Index file name on partition + 4) Data file name on partition +*/ + +void ha_partition::set_up_table_before_create(TABLE *table, + const char *partition_name_with_path, + HA_CREATE_INFO *info, + uint part_id) +{ + partition_element *part_elem= find_partition_element(part_id); + + if (!part_elem) + return; // Fatal error + table->s->max_rows= part_elem->part_max_rows; + table->s->min_rows= part_elem->part_min_rows; + const char *partition_name= strrchr(partition_name_with_path, FN_LIBCHAR); + if (part_elem->index_file_name) + append_file_to_dir(current_thd, + (const char**)&part_elem->index_file_name, + partition_name+1); + if (part_elem->data_file_name) + append_file_to_dir(current_thd, + (const char**)&part_elem->data_file_name, + partition_name+1); + info->index_file_name= part_elem->index_file_name; + info->data_file_name= part_elem->data_file_name; +} + + +/* + Add two names together + + SYNOPSIS + name_add() + out:dest Destination string + first_name First name + sec_name Second name + + RETURN VALUE + >0 Error + 0 Success + + DESCRIPTION + Routine used to add two names with '_' in between then. Service routine + to create_handler_file + Include the NULL in the count of characters since it is needed as separator + between the partition names. +*/ + +static uint name_add(char *dest, const char *first_name, const char *sec_name) +{ + return (uint) (strxmov(dest, first_name, "#SP#", sec_name, NullS) -dest) + 1; +} + + +/* + Create the special .par file + + SYNOPSIS + create_handler_file() + name Full path of table name + + RETURN VALUE + >0 Error code + 0 Success + + DESCRIPTION + Method used to create handler file with names of partitions, their + engine types and the number of partitions. +*/ + +bool ha_partition::create_handler_file(const char *name) +{ + partition_element *part_elem, *subpart_elem; + uint i, j, part_name_len, subpart_name_len; + uint tot_partition_words, tot_name_len, no_parts; + uint tot_parts= 0; + uint tot_len_words, tot_len_byte, chksum, tot_name_words; + char *name_buffer_ptr; + uchar *file_buffer, *engine_array; + bool result= TRUE; + char file_name[FN_REFLEN]; + char part_name[FN_REFLEN]; + char subpart_name[FN_REFLEN]; + File file; + List_iterator_fast <partition_element> part_it(m_part_info->partitions); + DBUG_ENTER("create_handler_file"); + + no_parts= m_part_info->partitions.elements; + DBUG_PRINT("info", ("table name = %s, no_parts = %u", name, + no_parts)); + tot_name_len= 0; + for (i= 0; i < no_parts; i++) + { + part_elem= part_it++; + if (part_elem->part_state != PART_NORMAL && + part_elem->part_state != PART_IS_ADDED && + part_elem->part_state != PART_IS_CHANGED) + continue; + tablename_to_filename(part_elem->partition_name, part_name, + FN_REFLEN); + part_name_len= strlen(part_name); + if (!m_is_sub_partitioned) + { + tot_name_len+= part_name_len + 1; + tot_parts++; + } + else + { + List_iterator_fast <partition_element> sub_it(part_elem->subpartitions); + for (j= 0; j < m_part_info->no_subparts; j++) + { + subpart_elem= sub_it++; + tablename_to_filename(subpart_elem->partition_name, + subpart_name, + FN_REFLEN); + subpart_name_len= strlen(subpart_name); + tot_name_len+= part_name_len + subpart_name_len + 5; + tot_parts++; + } + } + } + /* + File format: + Length in words 4 byte + Checksum 4 byte + Total number of partitions 4 byte + Array of engine types n * 4 bytes where + n = (m_tot_parts + 3)/4 + Length of name part in bytes 4 bytes + Name part m * 4 bytes where + m = ((length_name_part + 3)/4)*4 + + All padding bytes are zeroed + */ + tot_partition_words= (tot_parts + 3) / 4; + tot_name_words= (tot_name_len + 3) / 4; + tot_len_words= 4 + tot_partition_words + tot_name_words; + tot_len_byte= 4 * tot_len_words; + if (!(file_buffer= (uchar *) my_malloc(tot_len_byte, MYF(MY_ZEROFILL)))) + DBUG_RETURN(TRUE); + engine_array= (file_buffer + 12); + name_buffer_ptr= (char*) (file_buffer + ((4 + tot_partition_words) * 4)); + part_it.rewind(); + for (i= 0; i < no_parts; i++) + { + part_elem= part_it++; + if (part_elem->part_state != PART_NORMAL && + part_elem->part_state != PART_IS_ADDED && + part_elem->part_state != PART_IS_CHANGED) + continue; + if (!m_is_sub_partitioned) + { + tablename_to_filename(part_elem->partition_name, part_name, FN_REFLEN); + name_buffer_ptr= strmov(name_buffer_ptr, part_name)+1; + *engine_array= (uchar) ha_legacy_type(part_elem->engine_type); + DBUG_PRINT("info", ("engine: %u", *engine_array)); + engine_array++; + } + else + { + List_iterator_fast <partition_element> sub_it(part_elem->subpartitions); + for (j= 0; j < m_part_info->no_subparts; j++) + { + subpart_elem= sub_it++; + tablename_to_filename(part_elem->partition_name, part_name, + FN_REFLEN); + tablename_to_filename(subpart_elem->partition_name, subpart_name, + FN_REFLEN); + name_buffer_ptr+= name_add(name_buffer_ptr, + part_name, + subpart_name); + *engine_array= (uchar) ha_legacy_type(subpart_elem->engine_type); + DBUG_PRINT("info", ("engine: %u", *engine_array)); + engine_array++; + } + } + } + chksum= 0; + int4store(file_buffer, tot_len_words); + int4store(file_buffer + 8, tot_parts); + int4store(file_buffer + 12 + (tot_partition_words * 4), tot_name_len); + for (i= 0; i < tot_len_words; i++) + chksum^= uint4korr(file_buffer + 4 * i); + int4store(file_buffer + 4, chksum); + /* + Remove .frm extension and replace with .par + Create and write and close file + to be used at open, delete_table and rename_table + */ + fn_format(file_name, name, "", ha_par_ext, MY_APPEND_EXT); + if ((file= my_create(file_name, CREATE_MODE, O_RDWR | O_TRUNC, + MYF(MY_WME))) >= 0) + { + result= my_write(file, (byte *) file_buffer, tot_len_byte, + MYF(MY_WME | MY_NABP)); + VOID(my_close(file, MYF(0))); + } + else + result= TRUE; + my_free((char*) file_buffer, MYF(0)); + DBUG_RETURN(result); +} + +/* + Clear handler variables and free some memory + + SYNOPSIS + clear_handler_file() + + RETURN VALUE + NONE +*/ + +void ha_partition::clear_handler_file() +{ + my_free((char*) m_file_buffer, MYF(MY_ALLOW_ZERO_PTR)); + my_free((char*) m_engine_array, MYF(MY_ALLOW_ZERO_PTR)); + m_file_buffer= NULL; + m_name_buffer_ptr= NULL; + m_engine_array= NULL; +} + +/* + Create underlying handler objects + + SYNOPSIS + create_handlers() + + RETURN VALUE + TRUE Error + FALSE Success +*/ + +bool ha_partition::create_handlers() +{ + uint i; + uint alloc_len= (m_tot_parts + 1) * sizeof(handler*); + DBUG_ENTER("create_handlers"); + + if (!(m_file= (handler **) sql_alloc(alloc_len))) + DBUG_RETURN(TRUE); + bzero(m_file, alloc_len); + for (i= 0; i < m_tot_parts; i++) + { + if (!(m_file[i]= get_new_handler(table_share, current_thd->mem_root, + m_engine_array[i]))) + DBUG_RETURN(TRUE); + DBUG_PRINT("info", ("engine_type: %u", m_engine_array[i])); + } + m_file[m_tot_parts]= 0; + /* For the moment we only support partition over the same table engine */ + if (m_engine_array[0] == &myisam_hton) + { + DBUG_PRINT("info", ("MyISAM")); + m_myisam= TRUE; + } + /* INNODB may not be compiled in... */ + else if (ha_legacy_type(m_engine_array[0]) == DB_TYPE_INNODB) + { + DBUG_PRINT("info", ("InnoDB")); + m_innodb= TRUE; + } + DBUG_RETURN(FALSE); +} + +/* + Create underlying handler objects from partition info + + SYNOPSIS + new_handlers_from_part_info() + + RETURN VALUE + TRUE Error + FALSE Success +*/ + +bool ha_partition::new_handlers_from_part_info() +{ + uint i, j, part_count; + partition_element *part_elem; + uint alloc_len= (m_tot_parts + 1) * sizeof(handler*); + List_iterator_fast <partition_element> part_it(m_part_info->partitions); + THD *thd= current_thd; + DBUG_ENTER("ha_partition::new_handlers_from_part_info"); + + if (!(m_file= (handler **) sql_alloc(alloc_len))) + { + mem_alloc_error(alloc_len); + goto error_end; + } + bzero(m_file, alloc_len); + DBUG_ASSERT(m_part_info->no_parts > 0); + + i= 0; + part_count= 0; + /* + Don't know the size of the underlying storage engine, invent a number of + bytes allocated for error message if allocation fails + */ + do + { + part_elem= part_it++; + if (m_is_sub_partitioned) + { + for (j= 0; j < m_part_info->no_subparts; j++) + { + if (!(m_file[i]= get_new_handler(table_share, thd->mem_root, + part_elem->engine_type))) + goto error; + DBUG_PRINT("info", ("engine_type: %u", + (uint) ha_legacy_type(part_elem->engine_type))); + } + } + else + { + if (!(m_file[part_count++]= get_new_handler(table_share, thd->mem_root, + part_elem->engine_type))) + goto error; + DBUG_PRINT("info", ("engine_type: %u", + (uint) ha_legacy_type(part_elem->engine_type))); + } + } while (++i < m_part_info->no_parts); + if (part_elem->engine_type == &myisam_hton) + { + DBUG_PRINT("info", ("MyISAM")); + m_myisam= TRUE; + } + DBUG_RETURN(FALSE); +error: + mem_alloc_error(sizeof(handler)); +error_end: + DBUG_RETURN(TRUE); +} + + +/* + Get info about partition engines and their names from the .par file + + SYNOPSIS + get_from_handler_file() + name Full path of table name + + RETURN VALUE + TRUE Error + FALSE Success + + DESCRIPTION + Open handler file to get partition names, engine types and number of + partitions. +*/ + +bool ha_partition::get_from_handler_file(const char *name) +{ + char buff[FN_REFLEN], *address_tot_name_len; + File file; + char *file_buffer, *name_buffer_ptr; + handlerton **engine_array; + uint i, len_bytes, len_words, tot_partition_words, tot_name_words, chksum; + DBUG_ENTER("ha_partition::get_from_handler_file"); + DBUG_PRINT("enter", ("table name: '%s'", name)); + + if (m_file_buffer) + DBUG_RETURN(FALSE); + fn_format(buff, name, "", ha_par_ext, MY_APPEND_EXT); + + /* Following could be done with my_stat to read in whole file */ + if ((file= my_open(buff, O_RDONLY | O_SHARE, MYF(0))) < 0) + DBUG_RETURN(TRUE); + if (my_read(file, (byte *) & buff[0], 8, MYF(MY_NABP))) + goto err1; + len_words= uint4korr(buff); + len_bytes= 4 * len_words; + if (!(file_buffer= my_malloc(len_bytes, MYF(0)))) + goto err1; + VOID(my_seek(file, 0, MY_SEEK_SET, MYF(0))); + if (my_read(file, (byte *) file_buffer, len_bytes, MYF(MY_NABP))) + goto err2; + + chksum= 0; + for (i= 0; i < len_words; i++) + chksum ^= uint4korr((file_buffer) + 4 * i); + if (chksum) + goto err2; + m_tot_parts= uint4korr((file_buffer) + 8); + DBUG_PRINT("info", ("No of parts = %u", m_tot_parts)); + tot_partition_words= (m_tot_parts + 3) / 4; + if (!(engine_array= (handlerton **) my_malloc(m_tot_parts * sizeof(handlerton*),MYF(0)))) + goto err2; + for (i= 0; i < m_tot_parts; i++) + engine_array[i]= ha_resolve_by_legacy_type(current_thd, + (enum legacy_db_type) *(uchar *) ((file_buffer) + 12 + i)); + address_tot_name_len= file_buffer + 12 + 4 * tot_partition_words; + tot_name_words= (uint4korr(address_tot_name_len) + 3) / 4; + if (len_words != (tot_partition_words + tot_name_words + 4)) + goto err2; + name_buffer_ptr= file_buffer + 16 + 4 * tot_partition_words; + VOID(my_close(file, MYF(0))); + m_file_buffer= file_buffer; // Will be freed in clear_handler_file() + m_name_buffer_ptr= name_buffer_ptr; + m_engine_array= engine_array; + if (!m_file && create_handlers()) + { + clear_handler_file(); + DBUG_RETURN(TRUE); + } + DBUG_RETURN(FALSE); + +err2: + my_free(file_buffer, MYF(0)); +err1: + VOID(my_close(file, MYF(0))); + DBUG_RETURN(TRUE); +} + + +/**************************************************************************** + MODULE open/close object +****************************************************************************/ +/* + Open handler object + + SYNOPSIS + open() + name Full path of table name + mode Open mode flags + test_if_locked ? + + RETURN VALUE + >0 Error + 0 Success + + DESCRIPTION + Used for opening tables. The name will be the name of the file. + A table is opened when it needs to be opened. For instance + when a request comes in for a select on the table (tables are not + open and closed for each request, they are cached). + + Called from handler.cc by handler::ha_open(). The server opens all tables + by calling ha_open() which then calls the handler specific open(). +*/ + +int ha_partition::open(const char *name, int mode, uint test_if_locked) +{ + char *name_buffer_ptr= m_name_buffer_ptr; + int error; + uint alloc_len; + handler **file; + char name_buff[FN_REFLEN]; + DBUG_ENTER("ha_partition::open"); + + ref_length= 0; + m_mode= mode; + m_open_test_lock= test_if_locked; + m_part_field_array= m_part_info->full_part_field_array; + if (get_from_handler_file(name)) + DBUG_RETURN(1); + m_start_key.length= 0; + m_rec0= table->record[0]; + m_rec_length= table->s->reclength; + alloc_len= m_tot_parts * (m_rec_length + PARTITION_BYTES_IN_POS); + alloc_len+= table->s->max_key_length; + if (!m_ordered_rec_buffer) + { + if (!(m_ordered_rec_buffer= (byte*)my_malloc(alloc_len, MYF(MY_WME)))) + { + DBUG_RETURN(1); + } + { + /* + We set-up one record per partition and each record has 2 bytes in + front where the partition id is written. This is used by ordered + index_read. + We also set-up a reference to the first record for temporary use in + setting up the scan. + */ + char *ptr= (char*)m_ordered_rec_buffer; + uint i= 0; + do + { + int2store(ptr, i); + ptr+= m_rec_length + PARTITION_BYTES_IN_POS; + } while (++i < m_tot_parts); + m_start_key.key= (const byte*)ptr; + } + } + + /* Initialise the bitmap we use to determine what partitions are used */ + if (bitmap_init(&(m_part_info->used_partitions), NULL, m_tot_parts, TRUE)) + DBUG_RETURN(1); + bitmap_set_all(&(m_part_info->used_partitions)); + + file= m_file; + do + { + create_partition_name(name_buff, name, name_buffer_ptr, NORMAL_PART_NAME, + FALSE); + if ((error= (*file)->ha_open(table, (const char*) name_buff, mode, + test_if_locked))) + goto err_handler; + m_no_locks+= (*file)->lock_count(); + name_buffer_ptr+= strlen(name_buffer_ptr) + 1; + set_if_bigger(ref_length, ((*file)->ref_length)); + } while (*(++file)); + + /* + Add 2 bytes for partition id in position ref length. + ref_length=max_in_all_partitions(ref_length) + PARTITION_BYTES_IN_POS + */ + ref_length+= PARTITION_BYTES_IN_POS; + m_ref_length= ref_length; + /* + Release buffer read from .par file. It will not be reused again after + being opened once. + */ + clear_handler_file(); + /* + Initialise priority queue, initialised to reading forward. + */ + if ((error= init_queue(&m_queue, m_tot_parts, (uint) PARTITION_BYTES_IN_POS, + 0, key_rec_cmp, (void*)this))) + goto err_handler; + + /* + Some handlers update statistics as part of the open call. This will in + some cases corrupt the statistics of the partition handler and thus + to ensure we have correct statistics we call info from open after + calling open on all individual handlers. + */ + info(HA_STATUS_VARIABLE | HA_STATUS_CONST); + DBUG_RETURN(0); + +err_handler: + while (file-- != m_file) + (*file)->close(); +err: + DBUG_RETURN(error); +} + + +/* + Close handler object + + SYNOPSIS + close() + + RETURN VALUE + >0 Error code + 0 Success + + DESCRIPTION + Called from sql_base.cc, sql_select.cc, and table.cc. + In sql_select.cc it is only used to close up temporary tables or during + the process where a temporary table is converted over to being a + myisam table. + For sql_base.cc look at close_data_tables(). +*/ + +int ha_partition::close(void) +{ + bool first= TRUE; + handler **file; + DBUG_ENTER("ha_partition::close"); + + delete_queue(&m_queue); + bitmap_free(&(m_part_info->used_partitions)); + file= m_file; + +repeat: + do + { + (*file)->close(); + } while (*(++file)); + + if (first && m_added_file && m_added_file[0]) + { + file= m_added_file; + first= FALSE; + goto repeat; + } + + DBUG_RETURN(0); +} + +/**************************************************************************** + MODULE start/end statement +****************************************************************************/ +/* + A number of methods to define various constants for the handler. In + the case of the partition handler we need to use some max and min + of the underlying handlers in most cases. +*/ + +/* + Set external locks on table + + SYNOPSIS + external_lock() + thd Thread object + lock_type Type of external lock + + RETURN VALUE + >0 Error code + 0 Success + + DESCRIPTION + First you should go read the section "locking functions for mysql" in + lock.cc to understand this. + This create a lock on the table. If you are implementing a storage engine + that can handle transactions look at ha_berkeley.cc to see how you will + want to go about doing this. Otherwise you should consider calling + flock() here. + Originally this method was used to set locks on file level to enable + several MySQL Servers to work on the same data. For transactional + engines it has been "abused" to also mean start and end of statements + to enable proper rollback of statements and transactions. When LOCK + TABLES has been issued the start_stmt method takes over the role of + indicating start of statement but in this case there is no end of + statement indicator(?). + + Called from lock.cc by lock_external() and unlock_external(). Also called + from sql_table.cc by copy_data_between_tables(). +*/ + +int ha_partition::external_lock(THD *thd, int lock_type) +{ + bool first= TRUE; + uint error; + handler **file; + DBUG_ENTER("ha_partition::external_lock"); + + file= m_file; + m_lock_type= lock_type; + +repeat: + do + { + DBUG_PRINT("info", ("external_lock(thd, %d) iteration %d", + lock_type, (file - m_file))); + if ((error= (*file)->external_lock(thd, lock_type))) + { + if (F_UNLCK != lock_type) + goto err_handler; + } + } while (*(++file)); + + if (first && m_added_file && m_added_file[0]) + { + DBUG_ASSERT(lock_type == F_UNLCK); + file= m_added_file; + first= FALSE; + goto repeat; + } + DBUG_RETURN(0); + +err_handler: + while (file-- != m_file) + { + (*file)->external_lock(thd, F_UNLCK); + } + DBUG_RETURN(error); +} + + +/* + Get the lock(s) for the table and perform conversion of locks if needed + + SYNOPSIS + store_lock() + thd Thread object + to Lock object array + lock_type Table lock type + + RETURN VALUE + >0 Error code + 0 Success + + DESCRIPTION + The idea with handler::store_lock() is the following: + + The statement decided which locks we should need for the table + for updates/deletes/inserts we get WRITE locks, for SELECT... we get + read locks. + + Before adding the lock into the table lock handler (see thr_lock.c) + mysqld calls store lock with the requested locks. Store lock can now + modify a write lock to a read lock (or some other lock), ignore the + lock (if we don't want to use MySQL table locks at all) or add locks + for many tables (like we do when we are using a MERGE handler). + + Berkeley DB for partition changes all WRITE locks to TL_WRITE_ALLOW_WRITE + (which signals that we are doing WRITES, but we are still allowing other + reader's and writer's. + + When releasing locks, store_lock() is also called. In this case one + usually doesn't have to do anything. + + store_lock is called when holding a global mutex to ensure that only + one thread at a time changes the locking information of tables. + + In some exceptional cases MySQL may send a request for a TL_IGNORE; + This means that we are requesting the same lock as last time and this + should also be ignored. (This may happen when someone does a flush + table when we have opened a part of the tables, in which case mysqld + closes and reopens the tables and tries to get the same locks as last + time). In the future we will probably try to remove this. + + Called from lock.cc by get_lock_data(). +*/ + +THR_LOCK_DATA **ha_partition::store_lock(THD *thd, + THR_LOCK_DATA **to, + enum thr_lock_type lock_type) +{ + handler **file; + DBUG_ENTER("ha_partition::store_lock"); + file= m_file; + do + { + DBUG_PRINT("info", ("store lock %d iteration", (file - m_file))); + to= (*file)->store_lock(thd, to, lock_type); + } while (*(++file)); + DBUG_RETURN(to); +} + +/* + Start a statement when table is locked + + SYNOPSIS + start_stmt() + thd Thread object + lock_type Type of external lock + + RETURN VALUE + >0 Error code + 0 Success + + DESCRIPTION + This method is called instead of external lock when the table is locked + before the statement is executed. +*/ + +int ha_partition::start_stmt(THD *thd, thr_lock_type lock_type) +{ + int error= 0; + handler **file; + DBUG_ENTER("ha_partition::start_stmt"); + + file= m_file; + do + { + if ((error= (*file)->start_stmt(thd, lock_type))) + break; + } while (*(++file)); + DBUG_RETURN(error); +} + + +/* + Get number of lock objects returned in store_lock + + SYNOPSIS + lock_count() + + RETURN VALUE + Number of locks returned in call to store_lock + + DESCRIPTION + Returns the number of store locks needed in call to store lock. + We return number of partitions since we call store_lock on each + underlying handler. Assists the above functions in allocating + sufficient space for lock structures. +*/ + +uint ha_partition::lock_count() const +{ + DBUG_ENTER("ha_partition::lock_count"); + DBUG_PRINT("info", ("m_no_locks %d", m_no_locks)); + DBUG_RETURN(m_no_locks); +} + + +/* + Unlock last accessed row + + SYNOPSIS + unlock_row() + + RETURN VALUE + NONE + + DESCRIPTION + Record currently processed was not in the result set of the statement + and is thus unlocked. Used for UPDATE and DELETE queries. +*/ + +void ha_partition::unlock_row() +{ + m_file[m_last_part]->unlock_row(); + return; +} + + +/**************************************************************************** + MODULE change record +****************************************************************************/ + +/* + Insert a row to the table + + SYNOPSIS + write_row() + buf The row in MySQL Row Format + + RETURN VALUE + >0 Error code + 0 Success + + DESCRIPTION + write_row() inserts a row. buf() is a byte array of data, normally + record[0]. + + You can use the field information to extract the data from the native byte + array type. + + Example of this would be: + for (Field **field=table->field ; *field ; field++) + { + ... + } + + See ha_tina.cc for a variant of extracting all of the data as strings. + ha_berkeley.cc has a variant of how to store it intact by "packing" it + for ha_berkeley's own native storage type. + + See the note for update_row() on auto_increments and timestamps. This + case also applied to write_row(). + + Called from item_sum.cc, item_sum.cc, sql_acl.cc, sql_insert.cc, + sql_insert.cc, sql_select.cc, sql_table.cc, sql_udf.cc, and sql_update.cc. + + ADDITIONAL INFO: + + Most handlers set timestamp when calling write row if any such fields + exists. Since we are calling an underlying handler we assume the´ + underlying handler will assume this responsibility. + + Underlying handlers will also call update_auto_increment to calculate + the new auto increment value. We will catch the call to + get_auto_increment and ensure this increment value is maintained by + only one of the underlying handlers. +*/ + +int ha_partition::write_row(byte * buf) +{ + uint32 part_id; + int error; + longlong func_value; +#ifdef NOT_NEEDED + byte *rec0= m_rec0; +#endif + DBUG_ENTER("ha_partition::write_row"); + DBUG_ASSERT(buf == m_rec0); + +#ifdef NOT_NEEDED + if (likely(buf == rec0)) +#endif + error= m_part_info->get_partition_id(m_part_info, &part_id, + &func_value); +#ifdef NOT_NEEDED + else + { + set_field_ptr(m_part_field_array, buf, rec0); + error= m_part_info->get_partition_id(m_part_info, &part_id, + &func_value); + set_field_ptr(m_part_field_array, rec0, buf); + } +#endif + if (unlikely(error)) + DBUG_RETURN(error); + m_last_part= part_id; + DBUG_PRINT("info", ("Insert in partition %d", part_id)); + DBUG_RETURN(m_file[part_id]->write_row(buf)); +} + + +/* + Update an existing row + + SYNOPSIS + update_row() + old_data Old record in MySQL Row Format + new_data New record in MySQL Row Format + + RETURN VALUE + >0 Error code + 0 Success + + DESCRIPTION + Yes, update_row() does what you expect, it updates a row. old_data will + have the previous row record in it, while new_data will have the newest + data in it. + Keep in mind that the server can do updates based on ordering if an + ORDER BY clause was used. Consecutive ordering is not guarenteed. + + Currently new_data will not have an updated auto_increament record, or + and updated timestamp field. You can do these for partition by doing these: + if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE) + table->timestamp_field->set_time(); + if (table->next_number_field && record == table->record[0]) + update_auto_increment(); + + Called from sql_select.cc, sql_acl.cc, sql_update.cc, and sql_insert.cc. + new_data is always record[0] + old_data is normally record[1] but may be anything +*/ + +int ha_partition::update_row(const byte *old_data, byte *new_data) +{ + uint32 new_part_id, old_part_id; + int error; + longlong func_value; + DBUG_ENTER("ha_partition::update_row"); + + if ((error= get_parts_for_update(old_data, new_data, table->record[0], + m_part_info, &old_part_id, &new_part_id, + &func_value))) + { + DBUG_RETURN(error); + } + + /* + TODO: + set_internal_auto_increment= + max(set_internal_auto_increment, new_data->auto_increment) + */ + m_last_part= new_part_id; + if (new_part_id == old_part_id) + { + DBUG_PRINT("info", ("Update in partition %d", new_part_id)); + DBUG_RETURN(m_file[new_part_id]->update_row(old_data, new_data)); + } + else + { + DBUG_PRINT("info", ("Update from partition %d to partition %d", + old_part_id, new_part_id)); + if ((error= m_file[new_part_id]->write_row(new_data))) + DBUG_RETURN(error); + if ((error= m_file[old_part_id]->delete_row(old_data))) + { +#ifdef IN_THE_FUTURE + (void) m_file[new_part_id]->delete_last_inserted_row(new_data); +#endif + DBUG_RETURN(error); + } + } + DBUG_RETURN(0); +} + + +/* + Remove an existing row + + SYNOPSIS + delete_row + buf Deleted row in MySQL Row Format + + RETURN VALUE + >0 Error Code + 0 Success + + DESCRIPTION + This will delete a row. buf will contain a copy of the row to be deleted. + The server will call this right after the current row has been read + (from either a previous rnd_xxx() or index_xxx() call). + If you keep a pointer to the last row or can access a primary key it will + make doing the deletion quite a bit easier. + Keep in mind that the server does no guarentee consecutive deletions. + ORDER BY clauses can be used. + + Called in sql_acl.cc and sql_udf.cc to manage internal table information. + Called in sql_delete.cc, sql_insert.cc, and sql_select.cc. In sql_select + it is used for removing duplicates while in insert it is used for REPLACE + calls. + + buf is either record[0] or record[1] +*/ + +int ha_partition::delete_row(const byte *buf) +{ + uint32 part_id; + int error; + DBUG_ENTER("ha_partition::delete_row"); + + if ((error= get_part_for_delete(buf, m_rec0, m_part_info, &part_id))) + { + DBUG_RETURN(error); + } + m_last_part= part_id; + DBUG_RETURN(m_file[part_id]->delete_row(buf)); +} + + +/* + Delete all rows in a table + + SYNOPSIS + delete_all_rows() + + RETURN VALUE + >0 Error Code + 0 Success + + DESCRIPTION + Used to delete all rows in a table. Both for cases of truncate and + for cases where the optimizer realizes that all rows will be + removed as a result of a SQL statement. + + Called from item_sum.cc by Item_func_group_concat::clear(), + Item_sum_count_distinct::clear(), and Item_func_group_concat::clear(). + Called from sql_delete.cc by mysql_delete(). + Called from sql_select.cc by JOIN::reinit(). + Called from sql_union.cc by st_select_lex_unit::exec(). +*/ + +int ha_partition::delete_all_rows() +{ + int error; + handler **file; + DBUG_ENTER("ha_partition::delete_all_rows"); + + file= m_file; + do + { + if ((error= (*file)->delete_all_rows())) + DBUG_RETURN(error); + } while (*(++file)); + DBUG_RETURN(0); +} + + +/* + Start a large batch of insert rows + + SYNOPSIS + start_bulk_insert() + rows Number of rows to insert + + RETURN VALUE + NONE + + DESCRIPTION + rows == 0 means we will probably insert many rows +*/ + +void ha_partition::start_bulk_insert(ha_rows rows) +{ + handler **file; + DBUG_ENTER("ha_partition::start_bulk_insert"); + + if (!rows) + { + /* Avoid allocation big caches in all underlaying handlers */ + DBUG_VOID_RETURN; + } + rows= rows/m_tot_parts + 1; + file= m_file; + do + { + (*file)->start_bulk_insert(rows); + } while (*(++file)); + DBUG_VOID_RETURN; +} + + +/* + Finish a large batch of insert rows + + SYNOPSIS + end_bulk_insert() + + RETURN VALUE + >0 Error code + 0 Success +*/ + +int ha_partition::end_bulk_insert() +{ + int error= 0; + handler **file; + DBUG_ENTER("ha_partition::end_bulk_insert"); + + file= m_file; + do + { + int tmp; + if ((tmp= (*file)->end_bulk_insert())) + error= tmp; + } while (*(++file)); + DBUG_RETURN(error); +} + + +/**************************************************************************** + MODULE full table scan +****************************************************************************/ +/* + Initialize engine for random reads + + SYNOPSIS + ha_partition::rnd_init() + scan 0 Initialize for random reads through rnd_pos() + 1 Initialize for random scan through rnd_next() + + RETURN VALUE + >0 Error code + 0 Success + + DESCRIPTION + rnd_init() is called when the server wants the storage engine to do a + table scan or when the server wants to access data through rnd_pos. + + When scan is used we will scan one handler partition at a time. + When preparing for rnd_pos we will init all handler partitions. + No extra cache handling is needed when scannning is not performed. + + Before initialising we will call rnd_end to ensure that we clean up from + any previous incarnation of a table scan. + Called from filesort.cc, records.cc, sql_handler.cc, sql_select.cc, + sql_table.cc, and sql_update.cc. +*/ + +int ha_partition::rnd_init(bool scan) +{ + int error; + uint i= 0; + uint32 part_id; + handler **file; + DBUG_ENTER("ha_partition::rnd_init"); + + include_partition_fields_in_used_fields(); + + /* Now we see what the index of our first important partition is */ + DBUG_PRINT("info", ("m_part_info->used_partitions 0x%x", + m_part_info->used_partitions.bitmap)); + part_id= bitmap_get_first_set(&(m_part_info->used_partitions)); + DBUG_PRINT("info", ("m_part_spec.start_part %d", part_id)); + + if (MY_BIT_NONE == part_id) + goto err1; + + /* + We have a partition and we are scanning with rnd_next + so we bump our cache + */ + DBUG_PRINT("info", ("rnd_init on partition %d", part_id)); + if (scan) + { + /* + rnd_end() is needed for partitioning to reset internal data if scan + is already in use + */ + rnd_end(); + late_extra_cache(part_id); + if ((error= m_file[part_id]->ha_rnd_init(scan))) + goto err; + } + else + { + for (i= part_id; i < m_tot_parts; i++) + { + if (bitmap_is_set(&(m_part_info->used_partitions), i)) + { + if ((error= m_file[i]->ha_rnd_init(scan))) + goto err; + } + } + } + m_scan_value= scan; + m_part_spec.start_part= part_id; + m_part_spec.end_part= m_tot_parts - 1; + DBUG_PRINT("info", ("m_scan_value=%d", m_scan_value)); + DBUG_RETURN(0); + +err: + while ((int)--i >= (int)part_id) + { + if (bitmap_is_set(&(m_part_info->used_partitions), i)) + m_file[i]->ha_rnd_end(); + } +err1: + m_scan_value= 2; + m_part_spec.start_part= NO_CURRENT_PART_ID; + DBUG_RETURN(error); +} + + +/* + End of a table scan + + SYNOPSIS + rnd_end() + + RETURN VALUE + >0 Error code + 0 Success +*/ + +int ha_partition::rnd_end() +{ + handler **file; + DBUG_ENTER("ha_partition::rnd_end"); + switch (m_scan_value) { + case 2: // Error + break; + case 1: + if (NO_CURRENT_PART_ID != m_part_spec.start_part) // Table scan + { + late_extra_no_cache(m_part_spec.start_part); + m_file[m_part_spec.start_part]->ha_rnd_end(); + } + break; + case 0: + file= m_file; + do + { + if (bitmap_is_set(&(m_part_info->used_partitions), (file - m_file))) + (*file)->ha_rnd_end(); + } while (*(++file)); + break; + } + m_scan_value= 2; + m_part_spec.start_part= NO_CURRENT_PART_ID; + DBUG_RETURN(0); +} + +/* + read next row during full table scan (scan in random row order) + + SYNOPSIS + rnd_next() + buf buffer that should be filled with data + + RETURN VALUE + >0 Error code + 0 Success + + DESCRIPTION + This is called for each row of the table scan. When you run out of records + you should return HA_ERR_END_OF_FILE. + The Field structure for the table is the key to getting data into buf + in a manner that will allow the server to understand it. + + Called from filesort.cc, records.cc, sql_handler.cc, sql_select.cc, + sql_table.cc, and sql_update.cc. +*/ + +int ha_partition::rnd_next(byte *buf) +{ + handler *file; + int result= HA_ERR_END_OF_FILE; + uint part_id= m_part_spec.start_part; + DBUG_ENTER("ha_partition::rnd_next"); + + if (NO_CURRENT_PART_ID == part_id) + { + /* + The original set of partitions to scan was empty and thus we report + the result here. + */ + goto end; + } + + DBUG_ASSERT(m_scan_value == 1); + file= m_file[part_id]; + + while (TRUE) + { + int result= file->rnd_next(buf); + if (!result) + { + m_last_part= part_id; + m_part_spec.start_part= part_id; + table->status= 0; + DBUG_RETURN(0); + } + + /* + if we get here, then the current partition rnd_next returned failure + */ + if (result == HA_ERR_RECORD_DELETED) + continue; // Probably MyISAM + + if (result != HA_ERR_END_OF_FILE) + break; // Return error + + /* End current partition */ + late_extra_no_cache(part_id); + DBUG_PRINT("info", ("rnd_end on partition %d", part_id)); + if ((result= file->ha_rnd_end())) + break; + + /* Shift to next partition */ + while (++part_id < m_tot_parts && + !bitmap_is_set(&(m_part_info->used_partitions), part_id)) + ; + if (part_id >= m_tot_parts) + { + result= HA_ERR_END_OF_FILE; + break; + } + file= m_file[part_id]; + DBUG_PRINT("info", ("rnd_init on partition %d", part_id)); + if ((result= file->ha_rnd_init(1))) + break; + late_extra_cache(part_id); + } + +end: + m_part_spec.start_part= NO_CURRENT_PART_ID; + table->status= STATUS_NOT_FOUND; + DBUG_RETURN(result); +} + + +/* + Save position of current row + + SYNOPSIS + position() + record Current record in MySQL Row Format + + RETURN VALUE + NONE + + DESCRIPTION + position() is called after each call to rnd_next() if the data needs + to be ordered. You can do something like the following to store + the position: + ha_store_ptr(ref, ref_length, current_position); + + The server uses ref to store data. ref_length in the above case is + the size needed to store current_position. ref is just a byte array + that the server will maintain. If you are using offsets to mark rows, then + current_position should be the offset. If it is a primary key like in + BDB, then it needs to be a primary key. + + Called from filesort.cc, sql_select.cc, sql_delete.cc and sql_update.cc. +*/ + +void ha_partition::position(const byte *record) +{ + handler *file= m_file[m_last_part]; + DBUG_ENTER("ha_partition::position"); + + file->position(record); + int2store(ref, m_last_part); + memcpy((ref + PARTITION_BYTES_IN_POS), file->ref, + (ref_length - PARTITION_BYTES_IN_POS)); + +#ifdef SUPPORTING_PARTITION_OVER_DIFFERENT_ENGINES +#ifdef HAVE_purify + bzero(ref + PARTITION_BYTES_IN_POS + ref_length, + max_ref_length-ref_length); +#endif /* HAVE_purify */ +#endif + DBUG_VOID_RETURN; +} + +/* + Read row using position + + SYNOPSIS + rnd_pos() + out:buf Row read in MySQL Row Format + position Position of read row + + RETURN VALUE + >0 Error code + 0 Success + + DESCRIPTION + This is like rnd_next, but you are given a position to use + to determine the row. The position will be of the type that you stored in + ref. You can use ha_get_ptr(pos,ref_length) to retrieve whatever key + or position you saved when position() was called. + Called from filesort.cc records.cc sql_insert.cc sql_select.cc + sql_update.cc. +*/ + +int ha_partition::rnd_pos(byte * buf, byte *pos) +{ + uint part_id; + handler *file; + DBUG_ENTER("ha_partition::rnd_pos"); + + part_id= uint2korr((const byte *) pos); + DBUG_ASSERT(part_id < m_tot_parts); + file= m_file[part_id]; + m_last_part= part_id; + DBUG_RETURN(file->rnd_pos(buf, (pos + PARTITION_BYTES_IN_POS))); +} + + +/**************************************************************************** + MODULE index scan +****************************************************************************/ +/* + Positions an index cursor to the index specified in the handle. Fetches the + row if available. If the key value is null, begin at the first key of the + index. + + There are loads of optimisations possible here for the partition handler. + The same optimisations can also be checked for full table scan although + only through conditions and not from index ranges. + Phase one optimisations: + Check if the fields of the partition function are bound. If so only use + the single partition it becomes bound to. + Phase two optimisations: + If it can be deducted through range or list partitioning that only a + subset of the partitions are used, then only use those partitions. +*/ + +/* + Initialise handler before start of index scan + + SYNOPSIS + index_init() + inx Index number + sorted Is rows to be returned in sorted order + + RETURN VALUE + >0 Error code + 0 Success + + DESCRIPTION + index_init is always called before starting index scans (except when + starting through index_read_idx and using read_range variants). +*/ + +int ha_partition::index_init(uint inx, bool sorted) +{ + int error= 0; + handler **file; + DBUG_ENTER("ha_partition::index_init"); + + active_index= inx; + m_part_spec.start_part= NO_CURRENT_PART_ID; + m_start_key.length= 0; + m_ordered= sorted; + m_curr_key_info= table->key_info+inx; + include_partition_fields_in_used_fields(); + file= m_file; + do + { + /* TODO RONM: Change to index_init() when code is stable */ + if (bitmap_is_set(&(m_part_info->used_partitions), (file - m_file))) + if ((error= (*file)->ha_index_init(inx, sorted))) + { + DBUG_ASSERT(0); // Should never happen + break; + } + } while (*(++file)); + DBUG_RETURN(error); +} + + +/* + End of index scan + + SYNOPSIS + index_end() + + RETURN VALUE + >0 Error code + 0 Success + + DESCRIPTION + index_end is called at the end of an index scan to clean up any + things needed to clean up. +*/ + +int ha_partition::index_end() +{ + int error= 0; + handler **file; + DBUG_ENTER("ha_partition::index_end"); + + active_index= MAX_KEY; + m_part_spec.start_part= NO_CURRENT_PART_ID; + file= m_file; + do + { + int tmp; + /* TODO RONM: Change to index_end() when code is stable */ + if (bitmap_is_set(&(m_part_info->used_partitions), (file - m_file))) + if ((tmp= (*file)->ha_index_end())) + error= tmp; + } while (*(++file)); + DBUG_RETURN(error); +} + + +/* + Read one record in an index scan and start an index scan + + SYNOPSIS + index_read() + buf Read row in MySQL Row Format + key Key parts in consecutive order + key_len Total length of key parts + find_flag What type of key condition is used + + RETURN VALUE + >0 Error code + 0 Success + + DESCRIPTION + index_read starts a new index scan using a start key. The MySQL Server + will check the end key on its own. Thus to function properly the + partitioned handler need to ensure that it delivers records in the sort + order of the MySQL Server. + index_read can be restarted without calling index_end on the previous + index scan and without calling index_init. In this case the index_read + is on the same index as the previous index_scan. This is particularly + used in conjuntion with multi read ranges. +*/ + +int ha_partition::index_read(byte * buf, const byte * key, + uint key_len, enum ha_rkey_function find_flag) +{ + DBUG_ENTER("ha_partition::index_read"); + + end_range= 0; + DBUG_RETURN(common_index_read(buf, key, key_len, find_flag)); +} + + +/* + Common routine for a number of index_read variants + + SYNOPSIS + common_index_read + + see index_read for rest +*/ + +int ha_partition::common_index_read(byte *buf, const byte *key, uint key_len, + enum ha_rkey_function find_flag) +{ + int error; + DBUG_ENTER("ha_partition::common_index_read"); + + memcpy((void*)m_start_key.key, key, key_len); + m_start_key.length= key_len; + m_start_key.flag= find_flag; + m_index_scan_type= partition_index_read; + + if ((error= partition_scan_set_up(buf, TRUE))) + { + DBUG_RETURN(error); + } + + if (!m_ordered_scan_ongoing || + (find_flag == HA_READ_KEY_EXACT && + (key_len >= m_curr_key_info->key_length || + key_len == 0))) + { + /* + We use unordered index scan either when read_range is used and flag + is set to not use ordered or when an exact key is used and in this + case all records will be sorted equal and thus the sort order of the + resulting records doesn't matter. + We also use an unordered index scan when the number of partitions to + scan is only one. + The unordered index scan will use the partition set created. + Need to set unordered scan ongoing since we can come here even when + it isn't set. + */ + m_ordered_scan_ongoing= FALSE; + error= handle_unordered_scan_next_partition(buf); + } + else + { + /* + In all other cases we will use the ordered index scan. This will use + the partition set created by the get_partition_set method. + */ + error= handle_ordered_index_scan(buf); + } + DBUG_RETURN(error); +} + + +/* + Start an index scan from leftmost record and return first record + + SYNOPSIS + index_first() + buf Read row in MySQL Row Format + + RETURN VALUE + >0 Error code + 0 Success + + DESCRIPTION + index_first() asks for the first key in the index. + This is similar to index_read except that there is no start key since + the scan starts from the leftmost entry and proceeds forward with + index_next. + + Called from opt_range.cc, opt_sum.cc, sql_handler.cc, + and sql_select.cc. +*/ + +int ha_partition::index_first(byte * buf) +{ + DBUG_ENTER("ha_partition::index_first"); + + end_range= 0; + m_index_scan_type= partition_index_first; + DBUG_RETURN(common_first_last(buf)); +} + + +/* + Start an index scan from rightmost record and return first record + + SYNOPSIS + index_last() + buf Read row in MySQL Row Format + + RETURN VALUE + >0 Error code + 0 Success + + DESCRIPTION + index_last() asks for the last key in the index. + This is similar to index_read except that there is no start key since + the scan starts from the rightmost entry and proceeds forward with + index_prev. + + Called from opt_range.cc, opt_sum.cc, sql_handler.cc, + and sql_select.cc. +*/ + +int ha_partition::index_last(byte * buf) +{ + DBUG_ENTER("ha_partition::index_last"); + + m_index_scan_type= partition_index_last; + DBUG_RETURN(common_first_last(buf)); +} + +/* + Common routine for index_first/index_last + + SYNOPSIS + common_index_first_last + + see index_first for rest +*/ + +int ha_partition::common_first_last(byte *buf) +{ + int error; + + if ((error= partition_scan_set_up(buf, FALSE))) + return error; + if (!m_ordered_scan_ongoing) + return handle_unordered_scan_next_partition(buf); + return handle_ordered_index_scan(buf); +} + + +/* + Perform index read using index where always only one row is returned + + SYNOPSIS + index_read_idx() + see index_read for rest of parameters and return values + + DESCRIPTION + Positions an index cursor to the index specified in key. Fetches the + row if any. This is only used to read whole keys. + TODO: Optimise this code to avoid index_init and index_end +*/ + +int ha_partition::index_read_idx(byte * buf, uint index, const byte * key, + uint key_len, + enum ha_rkey_function find_flag) +{ + int res; + DBUG_ENTER("ha_partition::index_read_idx"); + + index_init(index, 0); + res= index_read(buf, key, key_len, find_flag); + index_end(); + DBUG_RETURN(res); +} + + +/* + Read last using key + + SYNOPSIS + index_read_last() + buf Read row in MySQL Row Format + key Key + keylen Length of key + + RETURN VALUE + >0 Error code + 0 Success + + DESCRIPTION + This is used in join_read_last_key to optimise away an ORDER BY. + Can only be used on indexes supporting HA_READ_ORDER +*/ + +int ha_partition::index_read_last(byte *buf, const byte *key, uint keylen) +{ + DBUG_ENTER("ha_partition::index_read_last"); + + m_ordered= TRUE; // Safety measure + DBUG_RETURN(index_read(buf, key, keylen, HA_READ_PREFIX_LAST)); +} + + +/* + Read next record in a forward index scan + + SYNOPSIS + index_next() + buf Read row in MySQL Row Format + + RETURN VALUE + >0 Error code + 0 Success + + DESCRIPTION + Used to read forward through the index. +*/ + +int ha_partition::index_next(byte * buf) +{ + DBUG_ENTER("ha_partition::index_next"); + + /* + TODO(low priority): + If we want partition to work with the HANDLER commands, we + must be able to do index_last() -> index_prev() -> index_next() + */ + DBUG_ASSERT(m_index_scan_type != partition_index_last); + if (!m_ordered_scan_ongoing) + { + DBUG_RETURN(handle_unordered_next(buf, FALSE)); + } + DBUG_RETURN(handle_ordered_next(buf, FALSE)); +} + + +/* + Read next record special + + SYNOPSIS + index_next_same() + buf Read row in MySQL Row Format + key Key + keylen Length of key + + RETURN VALUE + >0 Error code + 0 Success + + DESCRIPTION + This routine is used to read the next but only if the key is the same + as supplied in the call. +*/ + +int ha_partition::index_next_same(byte *buf, const byte *key, uint keylen) +{ + DBUG_ENTER("ha_partition::index_next_same"); + + DBUG_ASSERT(keylen == m_start_key.length); + DBUG_ASSERT(m_index_scan_type != partition_index_last); + if (!m_ordered_scan_ongoing) + DBUG_RETURN(handle_unordered_next(buf, TRUE)); + DBUG_RETURN(handle_ordered_next(buf, TRUE)); +} + + +/* + Read next record when performing index scan backwards + + SYNOPSIS + index_prev() + buf Read row in MySQL Row Format + + RETURN VALUE + >0 Error code + 0 Success + + DESCRIPTION + Used to read backwards through the index. +*/ + +int ha_partition::index_prev(byte * buf) +{ + DBUG_ENTER("ha_partition::index_prev"); + + /* TODO: read comment in index_next */ + DBUG_ASSERT(m_index_scan_type != partition_index_first); + DBUG_RETURN(handle_ordered_prev(buf)); +} + + +/* + Start a read of one range with start and end key + + SYNOPSIS + read_range_first() + start_key Specification of start key + end_key Specification of end key + eq_range_arg Is it equal range + sorted Should records be returned in sorted order + + RETURN VALUE + >0 Error code + 0 Success + + DESCRIPTION + We reimplement read_range_first since we don't want the compare_key + check at the end. This is already performed in the partition handler. + read_range_next is very much different due to that we need to scan + all underlying handlers. +*/ + +int ha_partition::read_range_first(const key_range *start_key, + const key_range *end_key, + bool eq_range_arg, bool sorted) +{ + int error; + DBUG_ENTER("ha_partition::read_range_first"); + + m_ordered= sorted; + eq_range= eq_range_arg; + end_range= 0; + if (end_key) + { + end_range= &save_end_range; + save_end_range= *end_key; + key_compare_result_on_equal= + ((end_key->flag == HA_READ_BEFORE_KEY) ? 1 : + (end_key->flag == HA_READ_AFTER_KEY) ? -1 : 0); + } + range_key_part= m_curr_key_info->key_part; + + if (!start_key) // Read first record + { + m_index_scan_type= partition_index_first; + error= common_first_last(m_rec0); + } + else + { + error= common_index_read(m_rec0, + start_key->key, + start_key->length, start_key->flag); + } + DBUG_RETURN(error); +} + + +/* + Read next record in read of a range with start and end key + + SYNOPSIS + read_range_next() + + RETURN VALUE + >0 Error code + 0 Success +*/ + +int ha_partition::read_range_next() +{ + DBUG_ENTER("ha_partition::read_range_next"); + + if (m_ordered) + { + DBUG_RETURN(handler::read_range_next()); + } + DBUG_RETURN(handle_unordered_next(m_rec0, eq_range)); +} + + +/* + Common routine to set up scans + + SYNOPSIS + buf Buffer to later return record in + idx_read_flag Is it index scan + + RETURN VALUE + >0 Error code + 0 Success + + DESCRIPTION + This is where we check which partitions to actually scan if not all + of them +*/ + +int ha_partition::partition_scan_set_up(byte * buf, bool idx_read_flag) +{ + DBUG_ENTER("ha_partition::partition_scan_set_up"); + + if (idx_read_flag) + get_partition_set(table,buf,active_index,&m_start_key,&m_part_spec); + else + { + m_part_spec.start_part= 0; + m_part_spec.end_part= m_tot_parts - 1; + } + if (m_part_spec.start_part > m_part_spec.end_part) + { + /* + We discovered a partition set but the set was empty so we report + key not found. + */ + DBUG_PRINT("info", ("scan with no partition to scan")); + DBUG_RETURN(HA_ERR_END_OF_FILE); + } + if (m_part_spec.start_part == m_part_spec.end_part) + { + /* + We discovered a single partition to scan, this never needs to be + performed using the ordered index scan. + */ + DBUG_PRINT("info", ("index scan using the single partition %d", + m_part_spec.start_part)); + m_ordered_scan_ongoing= FALSE; + } + else + { + /* + Set m_ordered_scan_ongoing according how the scan should be done + Only exact partitions are discovered atm by get_partition_set. + Verify this, also bitmap must have at least one bit set otherwise + the result from this table is the empty set. + */ + uint start_part= bitmap_get_first_set(&(m_part_info->used_partitions)); + if (start_part == MY_BIT_NONE) + { + DBUG_PRINT("info", ("scan with no partition to scan")); + DBUG_RETURN(HA_ERR_END_OF_FILE); + } + if (start_part > m_part_spec.start_part) + m_part_spec.start_part= start_part; + DBUG_ASSERT(m_part_spec.start_part < m_tot_parts); + m_ordered_scan_ongoing= m_ordered; + } + DBUG_ASSERT(m_part_spec.start_part < m_tot_parts && + m_part_spec.end_part < m_tot_parts); + DBUG_RETURN(0); +} + + +/**************************************************************************** + Unordered Index Scan Routines +****************************************************************************/ +/* + Common routine to handle index_next with unordered results + + SYNOPSIS + handle_unordered_next() + out:buf Read row in MySQL Row Format + next_same Called from index_next_same + + RETURN VALUE + HA_ERR_END_OF_FILE End of scan + 0 Success + other Error code + + DESCRIPTION + These routines are used to scan partitions without considering order. + This is performed in two situations. + 1) In read_multi_range this is the normal case + 2) When performing any type of index_read, index_first, index_last where + all fields in the partition function is bound. In this case the index + scan is performed on only one partition and thus it isn't necessary to + perform any sort. +*/ + +int ha_partition::handle_unordered_next(byte *buf, bool is_next_same) +{ + handler *file= file= m_file[m_part_spec.start_part]; + int error; + DBUG_ENTER("ha_partition::handle_unordered_next"); + + /* + We should consider if this should be split into two functions as + next_same is alwas a local constant + */ + if (is_next_same) + { + if (!(error= file->index_next_same(buf, m_start_key.key, + m_start_key.length))) + { + m_last_part= m_part_spec.start_part; + DBUG_RETURN(0); + } + } + else if (!(error= file->index_next(buf))) + { + if (compare_key(end_range) <= 0) + { + m_last_part= m_part_spec.start_part; + DBUG_RETURN(0); // Row was in range + } + error= HA_ERR_END_OF_FILE; + } + + if (error == HA_ERR_END_OF_FILE) + { + m_part_spec.start_part++; // Start using next part + error= handle_unordered_scan_next_partition(buf); + } + DBUG_RETURN(error); +} + + +/* + Handle index_next when changing to new partition + + SYNOPSIS + handle_unordered_scan_next_partition() + buf Read row in MySQL Row Format + + RETURN VALUE + HA_ERR_END_OF_FILE End of scan + 0 Success + other Error code + + DESCRIPTION + This routine is used to start the index scan on the next partition. + Both initial start and after completing scan on one partition. +*/ + +int ha_partition::handle_unordered_scan_next_partition(byte * buf) +{ + uint i; + DBUG_ENTER("ha_partition::handle_unordered_scan_next_partition"); + + for (i= m_part_spec.start_part; i <= m_part_spec.end_part; i++) + { + int error; + handler *file; + + if (!(bitmap_is_set(&(m_part_info->used_partitions), i))) + continue; + file= m_file[i]; + m_part_spec.start_part= i; + switch (m_index_scan_type) { + case partition_index_read: + DBUG_PRINT("info", ("index_read on partition %d", i)); + error= file->index_read(buf, m_start_key.key, + m_start_key.length, + m_start_key.flag); + break; + case partition_index_first: + DBUG_PRINT("info", ("index_first on partition %d", i)); + error= file->index_first(buf); + break; + default: + DBUG_ASSERT(FALSE); + DBUG_RETURN(1); + } + if (!error) + { + if (compare_key(end_range) <= 0) + { + m_last_part= i; + DBUG_RETURN(0); + } + error= HA_ERR_END_OF_FILE; + } + if ((error != HA_ERR_END_OF_FILE) && (error != HA_ERR_KEY_NOT_FOUND)) + DBUG_RETURN(error); + DBUG_PRINT("info", ("HA_ERR_END_OF_FILE on partition %d", i)); + } + m_part_spec.start_part= NO_CURRENT_PART_ID; + DBUG_RETURN(HA_ERR_END_OF_FILE); +} + + +/* + Common routine to start index scan with ordered results + + SYNOPSIS + handle_ordered_index_scan() + out:buf Read row in MySQL Row Format + + RETURN VALUE + HA_ERR_END_OF_FILE End of scan + 0 Success + other Error code + + DESCRIPTION + This part contains the logic to handle index scans that require ordered + output. This includes all except those started by read_range_first with + the flag ordered set to FALSE. Thus most direct index_read and all + index_first and index_last. + + We implement ordering by keeping one record plus a key buffer for each + partition. Every time a new entry is requested we will fetch a new + entry from the partition that is currently not filled with an entry. + Then the entry is put into its proper sort position. + + Returning a record is done by getting the top record, copying the + record to the request buffer and setting the partition as empty on + entries. +*/ + +int ha_partition::handle_ordered_index_scan(byte *buf) +{ + uint i; + uint j= 0; + bool found= FALSE; + bool reverse_order= FALSE; + DBUG_ENTER("ha_partition::handle_ordered_index_scan"); + + m_top_entry= NO_CURRENT_PART_ID; + queue_remove_all(&m_queue); + + DBUG_PRINT("info", ("m_part_spec.start_part %d", m_part_spec.start_part)); + for (i= m_part_spec.start_part; i <= m_part_spec.end_part; i++) + { + if (!(bitmap_is_set(&(m_part_info->used_partitions), i))) + continue; + byte *rec_buf_ptr= rec_buf(i); + int error; + handler *file= m_file[i]; + + switch (m_index_scan_type) { + case partition_index_read: + error= file->index_read(rec_buf_ptr, + m_start_key.key, + m_start_key.length, + m_start_key.flag); + reverse_order= FALSE; + break; + case partition_index_first: + error= file->index_first(rec_buf_ptr); + reverse_order= FALSE; + break; + case partition_index_last: + error= file->index_last(rec_buf_ptr); + reverse_order= TRUE; + break; + default: + DBUG_ASSERT(FALSE); + DBUG_RETURN(HA_ERR_END_OF_FILE); + } + if (!error) + { + found= TRUE; + /* + Initialise queue without order first, simply insert + */ + queue_element(&m_queue, j++)= (byte*)queue_buf(i); + } + else if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) + { + DBUG_RETURN(error); + } + } + if (found) + { + /* + We found at least one partition with data, now sort all entries and + after that read the first entry and copy it to the buffer to return in. + */ + queue_set_max_at_top(&m_queue, reverse_order); + queue_set_cmp_arg(&m_queue, (void*)m_curr_key_info); + m_queue.elements= j; + queue_fix(&m_queue); + return_top_record(buf); + DBUG_PRINT("info", ("Record returned from partition %d", m_top_entry)); + DBUG_RETURN(0); + } + DBUG_RETURN(HA_ERR_END_OF_FILE); +} + + +/* + Return the top record in sort order + + SYNOPSIS + return_top_record() + out:buf Row returned in MySQL Row Format + + RETURN VALUE + NONE +*/ + +void ha_partition::return_top_record(byte *buf) +{ + uint part_id; + byte *key_buffer= queue_top(&m_queue); + byte *rec_buffer= key_buffer + PARTITION_BYTES_IN_POS; + + part_id= uint2korr(key_buffer); + memcpy(buf, rec_buffer, m_rec_length); + m_last_part= part_id; + m_top_entry= part_id; +} + + +/* + Common routine to handle index_next with ordered results + + SYNOPSIS + handle_ordered_next() + out:buf Read row in MySQL Row Format + next_same Called from index_next_same + + RETURN VALUE + HA_ERR_END_OF_FILE End of scan + 0 Success + other Error code +*/ + +int ha_partition::handle_ordered_next(byte *buf, bool is_next_same) +{ + int error; + uint part_id= m_top_entry; + handler *file= m_file[part_id]; + DBUG_ENTER("ha_partition::handle_ordered_next"); + + if (!is_next_same) + error= file->index_next(rec_buf(part_id)); + else + error= file->index_next_same(rec_buf(part_id), m_start_key.key, + m_start_key.length); + if (error) + { + if (error == HA_ERR_END_OF_FILE) + { + /* Return next buffered row */ + queue_remove(&m_queue, (uint) 0); + if (m_queue.elements) + { + DBUG_PRINT("info", ("Record returned from partition %u (2)", + m_top_entry)); + return_top_record(buf); + error= 0; + } + } + DBUG_RETURN(error); + } + queue_replaced(&m_queue); + return_top_record(buf); + DBUG_PRINT("info", ("Record returned from partition %u", m_top_entry)); + DBUG_RETURN(0); +} + + +/* + Common routine to handle index_prev with ordered results + + SYNOPSIS + handle_ordered_prev() + out:buf Read row in MySQL Row Format + + RETURN VALUE + HA_ERR_END_OF_FILE End of scan + 0 Success + other Error code +*/ + +int ha_partition::handle_ordered_prev(byte *buf) +{ + int error; + uint part_id= m_top_entry; + handler *file= m_file[part_id]; + DBUG_ENTER("ha_partition::handle_ordered_prev"); + + if ((error= file->index_prev(rec_buf(part_id)))) + { + if (error == HA_ERR_END_OF_FILE) + { + queue_remove(&m_queue, (uint) 0); + if (m_queue.elements) + { + return_top_record(buf); + DBUG_PRINT("info", ("Record returned from partition %d (2)", + m_top_entry)); + error= 0; + } + } + DBUG_RETURN(error); + } + queue_replaced(&m_queue); + return_top_record(buf); + DBUG_PRINT("info", ("Record returned from partition %d", m_top_entry)); + DBUG_RETURN(0); +} + + +/* + Set fields in partition functions in read set for underlying handlers + + SYNOPSIS + include_partition_fields_in_used_fields() + + RETURN VALUE + NONE + + DESCRIPTION + Some handlers only read fields as specified by the bitmap for the + read set. For partitioned handlers we always require that the + fields of the partition functions are read such that we can + calculate the partition id to place updated and deleted records. +*/ + +void ha_partition::include_partition_fields_in_used_fields() +{ + Field **ptr= m_part_field_array; + DBUG_ENTER("ha_partition::include_partition_fields_in_used_fields"); + + do + { + ha_set_bit_in_read_set((*ptr)->fieldnr); + } while (*(++ptr)); + DBUG_VOID_RETURN; +} + + +/**************************************************************************** + MODULE information calls +****************************************************************************/ + +/* + These are all first approximations of the extra, info, scan_time + and read_time calls +*/ + +/* + General method to gather info from handler + + SYNOPSIS + info() + flag Specifies what info is requested + + RETURN VALUE + NONE + + DESCRIPTION + ::info() is used to return information to the optimizer. + Currently this table handler doesn't implement most of the fields + really needed. SHOW also makes use of this data + Another note, if your handler doesn't proved exact record count, + you will probably want to have the following in your code: + if (records < 2) + records = 2; + The reason is that the server will optimize for cases of only a single + record. If in a table scan you don't know the number of records + it will probably be better to set records to two so you can return + as many records as you need. + + Along with records a few more variables you may wish to set are: + records + deleted + data_file_length + index_file_length + delete_length + check_time + Take a look at the public variables in handler.h for more information. + + Called in: + filesort.cc + ha_heap.cc + item_sum.cc + opt_sum.cc + sql_delete.cc + sql_delete.cc + sql_derived.cc + sql_select.cc + sql_select.cc + sql_select.cc + sql_select.cc + sql_select.cc + sql_show.cc + sql_show.cc + sql_show.cc + sql_show.cc + sql_table.cc + sql_union.cc + sql_update.cc + + Some flags that are not implemented + HA_STATUS_POS: + This parameter is never used from the MySQL Server. It is checked in a + place in MyISAM so could potentially be used by MyISAM specific + programs. + HA_STATUS_NO_LOCK: + This is declared and often used. It's only used by MyISAM. + It means that MySQL doesn't need the absolute latest statistics + information. This may save the handler from doing internal locks while + retrieving statistics data. +*/ + +void ha_partition::info(uint flag) +{ + handler *file, **file_array; + DBUG_ENTER("ha_partition:info"); + + if (flag & HA_STATUS_AUTO) + { + DBUG_PRINT("info", ("HA_STATUS_AUTO")); + /* + The auto increment value is only maintained by the first handler + so we will only call this. + */ + m_file[0]->info(HA_STATUS_AUTO); + } + if (flag & HA_STATUS_VARIABLE) + { + DBUG_PRINT("info", ("HA_STATUS_VARIABLE")); + /* + Calculates statistical variables + records: Estimate of number records in table + We report sum (always at least 2) + deleted: Estimate of number holes in the table due to + deletes + We report sum + data_file_length: Length of data file, in principle bytes in table + We report sum + index_file_length: Length of index file, in principle bytes in + indexes in the table + We report sum + mean_record_length:Mean record length in the table + We calculate this + check_time: Time of last check (only applicable to MyISAM) + We report last time of all underlying handlers + */ + records= 0; + deleted= 0; + data_file_length= 0; + index_file_length= 0; + check_time= 0; + file_array= m_file; + do + { + if (bitmap_is_set(&(m_part_info->used_partitions), (file_array - m_file))) + { + file= *file_array; + file->info(HA_STATUS_VARIABLE); + records+= file->records; + deleted+= file->deleted; + data_file_length+= file->data_file_length; + index_file_length+= file->index_file_length; + if (file->check_time > check_time) + check_time= file->check_time; + } + } while (*(++file_array)); + if (records < 2 && + m_table_flags & HA_NOT_EXACT_COUNT) + records= 2; + if (records > 0) + mean_rec_length= (ulong) (data_file_length / records); + else + mean_rec_length= 1; //? What should we set here + } + if (flag & HA_STATUS_CONST) + { + DBUG_PRINT("info", ("HA_STATUS_CONST")); + /* + Recalculate loads of constant variables. MyISAM also sets things + directly on the table share object. + + Check whether this should be fixed since handlers should not + change things directly on the table object. + + Monty comment: This should NOT be changed! It's the handlers + responsibility to correct table->s->keys_xxxx information if keys + have been disabled. + + The most important parameters set here is records per key on + all indexes. block_size and primar key ref_length. + + For each index there is an array of rec_per_key. + As an example if we have an index with three attributes a,b and c + we will have an array of 3 rec_per_key. + rec_per_key[0] is an estimate of number of records divided by + number of unique values of the field a. + rec_per_key[1] is an estimate of the number of records divided + by the number of unique combinations of the fields a and b. + rec_per_key[2] is an estimate of the number of records divided + by the number of unique combinations of the fields a,b and c. + + Many handlers only set the value of rec_per_key when all fields + are bound (rec_per_key[2] in the example above). + + If the handler doesn't support statistics, it should set all of the + above to 0. + + We will allow the first handler to set the rec_per_key and use + this as an estimate on the total table. + + max_data_file_length: Maximum data file length + We ignore it, is only used in + SHOW TABLE STATUS + max_index_file_length: Maximum index file length + We ignore it since it is never used + block_size: Block size used + We set it to the value of the first handler + sortkey: Never used at any place so ignored + ref_length: We set this to the value calculated + and stored in local object + create_time: Creation time of table + Set by first handler + + So we calculate these constants by using the variables on the first + handler. + */ + + file= m_file[0]; + file->info(HA_STATUS_CONST); + create_time= file->create_time; + ref_length= m_ref_length; + } + if (flag & HA_STATUS_ERRKEY) + { + handler *file= m_file[m_last_part]; + DBUG_PRINT("info", ("info: HA_STATUS_ERRKEY")); + /* + This flag is used to get index number of the unique index that + reported duplicate key + We will report the errkey on the last handler used and ignore the rest + */ + file->info(HA_STATUS_ERRKEY); + if (file->errkey != (uint) -1) + errkey= file->errkey; + } + if (flag & HA_STATUS_TIME) + { + DBUG_PRINT("info", ("info: HA_STATUS_TIME")); + /* + This flag is used to set the latest update time of the table. + Used by SHOW commands + We will report the maximum of these times + */ + update_time= 0; + file_array= m_file; + do + { + file= *file_array; + file->info(HA_STATUS_TIME); + if (file->update_time > update_time) + update_time= file->update_time; + } while (*(++file_array)); + } + DBUG_VOID_RETURN; +} + + +void ha_partition::get_dynamic_partition_info(PARTITION_INFO *stat_info, + uint part_id) +{ + handler *file= m_file[part_id]; + file->info(HA_STATUS_CONST | HA_STATUS_TIME | HA_STATUS_VARIABLE | + HA_STATUS_NO_LOCK); + + stat_info->records= file->records; + stat_info->mean_rec_length= file->mean_rec_length; + stat_info->data_file_length= file->data_file_length; + stat_info->max_data_file_length= file->max_data_file_length; + stat_info->index_file_length= file->index_file_length; + stat_info->delete_length= file->delete_length; + stat_info->create_time= file->create_time; + stat_info->update_time= file->update_time; + stat_info->check_time= file->check_time; + stat_info->check_sum= 0; + if (file->table_flags() & (ulong) HA_HAS_CHECKSUM) + stat_info->check_sum= file->checksum(); + return; +} + + +/* + General function to prepare handler for certain behavior + + SYNOPSIS + extra() + operation Operation type for extra call + + RETURN VALUE + >0 Error code + 0 Success + + DESCRIPTION + extra() is called whenever the server wishes to send a hint to + the storage engine. The MyISAM engine implements the most hints. + + We divide the parameters into the following categories: + 1) Parameters used by most handlers + 2) Parameters used by some non-MyISAM handlers + 3) Parameters used only by MyISAM + 4) Parameters only used by temporary tables for query processing + 5) Parameters only used by MyISAM internally + 6) Parameters not used at all + + The partition handler need to handle category 1), 2) and 3). + + 1) Parameters used by most handlers + ----------------------------------- + HA_EXTRA_RESET: + This option is used by most handlers and it resets the handler state + to the same state as after an open call. This includes releasing + any READ CACHE or WRITE CACHE or other internal buffer used. + + It is called from the reset method in the handler interface. There are + three instances where this is called. + 1) After completing a INSERT ... SELECT ... query the handler for the + table inserted into is reset + 2) It is called from close_thread_table which in turn is called from + close_thread_tables except in the case where the tables are locked + in which case ha_commit_stmt is called instead. + It is only called from here if flush_version hasn't changed and the + table is not an old table when calling close_thread_table. + close_thread_tables is called from many places as a general clean up + function after completing a query. + 3) It is called when deleting the QUICK_RANGE_SELECT object if the + QUICK_RANGE_SELECT object had its own handler object. It is called + immediatley before close of this local handler object. + HA_EXTRA_KEYREAD: + HA_EXTRA_NO_KEYREAD: + These parameters are used to provide an optimisation hint to the handler. + If HA_EXTRA_KEYREAD is set it is enough to read the index fields, for + many handlers this means that the index-only scans can be used and it + is not necessary to use the real records to satisfy this part of the + query. Index-only scans is a very important optimisation for disk-based + indexes. For main-memory indexes most indexes contain a reference to the + record and thus KEYREAD only says that it is enough to read key fields. + HA_EXTRA_NO_KEYREAD disables this for the handler, also HA_EXTRA_RESET + will disable this option. + The handler will set HA_KEYREAD_ONLY in its table flags to indicate this + feature is supported. + HA_EXTRA_FLUSH: + Indication to flush tables to disk, called at close_thread_table to + ensure disk based tables are flushed at end of query execution. + + 2) Parameters used by some non-MyISAM handlers + ---------------------------------------------- + HA_EXTRA_RETRIEVE_ALL_COLS: + Many handlers have implemented optimisations to avoid fetching all + fields when retrieving data. In certain situations all fields need + to be retrieved even though the query_id is not set on all field + objects. + + It is called from copy_data_between_tables where all fields are + copied without setting query_id before calling the handlers. + It is called from UPDATE statements when the fields of the index + used is updated or ORDER BY is used with UPDATE. + And finally when calculating checksum of a table using the CHECKSUM + command. + HA_EXTRA_RETRIEVE_PRIMARY_KEY: + In some situations it is mandatory to retrieve primary key fields + independent of the query id's. This extra flag specifies that fetch + of primary key fields is mandatory. + HA_EXTRA_KEYREAD_PRESERVE_FIELDS: + This is a strictly InnoDB feature that is more or less undocumented. + When it is activated InnoDB copies field by field from its fetch + cache instead of all fields in one memcpy. Have no idea what the + purpose of this is. + Cut from include/my_base.h: + When using HA_EXTRA_KEYREAD, overwrite only key member fields and keep + other fields intact. When this is off (by default) InnoDB will use memcpy + to overwrite entire row. + HA_EXTRA_IGNORE_DUP_KEY: + HA_EXTRA_NO_IGNORE_DUP_KEY: + Informs the handler to we will not stop the transaction if we get an + duplicate key errors during insert/upate. + Always called in pair, triggered by INSERT IGNORE and other similar + SQL constructs. + Not used by MyISAM. + + 3) Parameters used only by MyISAM + --------------------------------- + HA_EXTRA_NORMAL: + Only used in MyISAM to reset quick mode, not implemented by any other + handler. Quick mode is also reset in MyISAM by HA_EXTRA_RESET. + + It is called after completing a successful DELETE query if the QUICK + option is set. + + HA_EXTRA_QUICK: + When the user does DELETE QUICK FROM table where-clause; this extra + option is called before the delete query is performed and + HA_EXTRA_NORMAL is called after the delete query is completed. + Temporary tables used internally in MySQL always set this option + + The meaning of quick mode is that when deleting in a B-tree no merging + of leafs is performed. This is a common method and many large DBMS's + actually only support this quick mode since it is very difficult to + merge leaves in a tree used by many threads concurrently. + + HA_EXTRA_CACHE: + This flag is usually set with extra_opt along with a cache size. + The size of this buffer is set by the user variable + record_buffer_size. The value of this cache size is the amount of + data read from disk in each fetch when performing a table scan. + This means that before scanning a table it is normal to call + extra with HA_EXTRA_CACHE and when the scan is completed to call + HA_EXTRA_NO_CACHE to release the cache memory. + + Some special care is taken when using this extra parameter since there + could be a write ongoing on the table in the same statement. In this + one has to take special care since there might be a WRITE CACHE as + well. HA_EXTRA_CACHE specifies using a READ CACHE and using + READ CACHE and WRITE CACHE at the same time is not possible. + + Only MyISAM currently use this option. + + It is set when doing full table scans using rr_sequential and + reset when completing such a scan with end_read_record + (resetting means calling extra with HA_EXTRA_NO_CACHE). + + It is set in filesort.cc for MyISAM internal tables and it is set in + a multi-update where HA_EXTRA_CACHE is called on a temporary result + table and after that ha_rnd_init(0) on table to be updated + and immediately after that HA_EXTRA_NO_CACHE on table to be updated. + + Apart from that it is always used from init_read_record but not when + used from UPDATE statements. It is not used from DELETE statements + with ORDER BY and LIMIT but it is used in normal scan loop in DELETE + statements. The reason here is that DELETE's in MyISAM doesn't move + existings data rows. + + It is also set in copy_data_between_tables when scanning the old table + to copy over to the new table. + And it is set in join_init_read_record where quick objects are used + to perform a scan on the table. In this case the full table scan can + even be performed multiple times as part of the nested loop join. + + For purposes of the partition handler it is obviously necessary to have + special treatment of this extra call. If we would simply pass this + extra call down to each handler we would allocate + cache size * no of partitions amount of memory and this is not + necessary since we will only scan one partition at a time when doing + full table scans. + + Thus we treat it by first checking whether we have MyISAM handlers in + the table, if not we simply ignore the call and if we have we will + record the call but will not call any underlying handler yet. Then + when performing the sequential scan we will check this recorded value + and call extra_opt whenever we start scanning a new partition. + + monty: Neads to be fixed so that it's passed to all handlers when we + move to another partition during table scan. + + HA_EXTRA_NO_CACHE: + When performing a UNION SELECT HA_EXTRA_NO_CACHE is called from the + flush method in the select_union class. + It is used to some extent when insert delayed inserts. + See HA_EXTRA_RESET_STATE for use in conjunction with delete_all_rows(). + + It should be ok to call HA_EXTRA_NO_CACHE on all underlying handlers + if they are MyISAM handlers. Other handlers we can ignore the call + for. If no cache is in use they will quickly return after finding + this out. And we also ensure that all caches are disabled and no one + is left by mistake. + In the future this call will probably be deleted an we will instead call + ::reset(); + + HA_EXTRA_WRITE_CACHE: + See above, called from various places. It is mostly used when we + do INSERT ... SELECT + No special handling to save cache space is developed currently. + + HA_EXTRA_PREPARE_FOR_UPDATE: + This is called as part of a multi-table update. When the table to be + updated is also scanned then this informs MyISAM handler to drop any + caches if dynamic records are used (fixed size records do not care + about this call). We pass this along to all underlying MyISAM handlers + and ignore it for the rest. + + HA_EXTRA_PREPARE_FOR_DELETE: + Only used by MyISAM, called in preparation for a DROP TABLE. + It's used mostly by Windows that cannot handle dropping an open file. + On other platforms it has the same effect as HA_EXTRA_FORCE_REOPEN. + + HA_EXTRA_READCHECK: + HA_EXTRA_NO_READCHECK: + Only one call to HA_EXTRA_NO_READCHECK from ha_open where it says that + this is not needed in SQL. The reason for this call is that MyISAM sets + the READ_CHECK_USED in the open call so the call is needed for MyISAM + to reset this feature. + The idea with this parameter was to inform of doing/not doing a read + check before applying an update. Since SQL always performs a read before + applying the update No Read Check is needed in MyISAM as well. + + This is a cut from Docs/myisam.txt + Sometimes you might want to force an update without checking whether + another user has changed the record since you last read it. This is + somewhat dangerous, so it should ideally not be used. That can be + accomplished by wrapping the mi_update() call in two calls to mi_extra(), + using these functions: + HA_EXTRA_NO_READCHECK=5 No readcheck on update + HA_EXTRA_READCHECK=6 Use readcheck (def) + + HA_EXTRA_FORCE_REOPEN: + Only used by MyISAM, called when altering table, closing tables to + enforce a reopen of the table files. + + 4) Parameters only used by temporary tables for query processing + ---------------------------------------------------------------- + HA_EXTRA_RESET_STATE: + Same as HA_EXTRA_RESET except that buffers are not released. If there is + a READ CACHE it is reinit'ed. A cache is reinit'ed to restart reading + or to change type of cache between READ CACHE and WRITE CACHE. + + This extra function is always called immediately before calling + delete_all_rows on the handler for temporary tables. + There are cases however when HA_EXTRA_RESET_STATE isn't called in + a similar case for a temporary table in sql_union.cc and in two other + cases HA_EXTRA_NO_CACHE is called before and HA_EXTRA_WRITE_CACHE + called afterwards. + The case with HA_EXTRA_NO_CACHE and HA_EXTRA_WRITE_CACHE means + disable caching, delete all rows and enable WRITE CACHE. This is + used for temporary tables containing distinct sums and a + functional group. + + The only case that delete_all_rows is called on non-temporary tables + is in sql_delete.cc when DELETE FROM table; is called by a user. + In this case no special extra calls are performed before or after this + call. + + The partition handler should not need to bother about this one. It + should never be called. + + HA_EXTRA_NO_ROWS: + Don't insert rows indication to HEAP and MyISAM, only used by temporary + tables used in query processing. + Not handled by partition handler. + + 5) Parameters only used by MyISAM internally + -------------------------------------------- + HA_EXTRA_REINIT_CACHE: + This call reinitialises the READ CACHE described above if there is one + and otherwise the call is ignored. + + We can thus safely call it on all underlying handlers if they are + MyISAM handlers. It is however never called so we don't handle it at all. + HA_EXTRA_FLUSH_CACHE: + Flush WRITE CACHE in MyISAM. It is only from one place in the code. + This is in sql_insert.cc where it is called if the table_flags doesn't + contain HA_DUPP_POS. The only handler having the HA_DUPP_POS set is the + MyISAM handler and so the only handler not receiving this call is MyISAM. + Thus in effect this call is called but never used. Could be removed + from sql_insert.cc + HA_EXTRA_NO_USER_CHANGE: + Only used by MyISAM, never called. + Simulates lock_type as locked. + HA_EXTRA_WAIT_LOCK: + HA_EXTRA_WAIT_NOLOCK: + Only used by MyISAM, called from MyISAM handler but never from server + code on top of the handler. + Sets lock_wait on/off + HA_EXTRA_NO_KEYS: + Only used MyISAM, only used internally in MyISAM handler, never called + from server level. + HA_EXTRA_KEYREAD_CHANGE_POS: + HA_EXTRA_REMEMBER_POS: + HA_EXTRA_RESTORE_POS: + HA_EXTRA_PRELOAD_BUFFER_SIZE: + HA_EXTRA_CHANGE_KEY_TO_DUP: + HA_EXTRA_CHANGE_KEY_TO_UNIQUE: + Only used by MyISAM, never called. + + 6) Parameters not used at all + ----------------------------- + HA_EXTRA_KEY_CACHE: + HA_EXTRA_NO_KEY_CACHE: + This parameters are no longer used and could be removed. +*/ + +int ha_partition::extra(enum ha_extra_function operation) +{ + DBUG_ENTER("ha_partition:extra"); + DBUG_PRINT("info", ("operation: %d", (int) operation)); + + switch (operation) { + /* Category 1), used by most handlers */ + case HA_EXTRA_KEYREAD: + case HA_EXTRA_NO_KEYREAD: + case HA_EXTRA_FLUSH: + DBUG_RETURN(loop_extra(operation)); + + /* Category 2), used by non-MyISAM handlers */ + case HA_EXTRA_IGNORE_DUP_KEY: + case HA_EXTRA_NO_IGNORE_DUP_KEY: + case HA_EXTRA_RETRIEVE_ALL_COLS: + case HA_EXTRA_RETRIEVE_PRIMARY_KEY: + case HA_EXTRA_KEYREAD_PRESERVE_FIELDS: + { + if (!m_myisam) + DBUG_RETURN(loop_extra(operation)); + break; + } + + /* Category 3), used by MyISAM handlers */ + case HA_EXTRA_NORMAL: + case HA_EXTRA_QUICK: + case HA_EXTRA_NO_READCHECK: + case HA_EXTRA_PREPARE_FOR_UPDATE: + case HA_EXTRA_PREPARE_FOR_DELETE: + case HA_EXTRA_FORCE_REOPEN: + case HA_EXTRA_FLUSH_CACHE: + { + if (m_myisam) + DBUG_RETURN(loop_extra(operation)); + break; + } + case HA_EXTRA_CACHE: + { + prepare_extra_cache(0); + break; + } + case HA_EXTRA_NO_CACHE: + { + m_extra_cache= FALSE; + m_extra_cache_size= 0; + DBUG_RETURN(loop_extra(operation)); + } + default: + { + /* Temporary crash to discover what is wrong */ + DBUG_ASSERT(0); + break; + } + } + DBUG_RETURN(0); +} + + +/* + Special extra call to reset extra parameters + + SYNOPSIS + reset() + + RETURN VALUE + >0 Error code + 0 Success + + DESCRIPTION + This will in the future be called instead of extra(HA_EXTRA_RESET) as this + is such a common call +*/ + +int ha_partition::reset(void) +{ + int result= 0, tmp; + handler **file; + DBUG_ENTER("ha_partition::reset"); + if (m_part_info) + bitmap_set_all(&m_part_info->used_partitions); + file= m_file; + do + { + if ((tmp= (*file)->reset())) + result= tmp; + } while (*(++file)); + DBUG_RETURN(result); +} + +/* + Special extra method for HA_EXTRA_CACHE with cachesize as extra parameter + + SYNOPSIS + extra_opt() + operation Must be HA_EXTRA_CACHE + cachesize Size of cache in full table scan + + RETURN VALUE + >0 Error code + 0 Success +*/ + +int ha_partition::extra_opt(enum ha_extra_function operation, ulong cachesize) +{ + DBUG_ENTER("ha_partition::extra_opt()"); + + DBUG_ASSERT(HA_EXTRA_CACHE == operation); + prepare_extra_cache(cachesize); + DBUG_RETURN(0); +} + + +/* + Call extra on handler with HA_EXTRA_CACHE and cachesize + + SYNOPSIS + prepare_extra_cache() + cachesize Size of cache for full table scan + + RETURN VALUE + NONE +*/ + +void ha_partition::prepare_extra_cache(uint cachesize) +{ + DBUG_ENTER("ha_partition::prepare_extra_cache()"); + + m_extra_cache= TRUE; + m_extra_cache_size= cachesize; + if (m_part_spec.start_part != NO_CURRENT_PART_ID) + { + late_extra_cache(m_part_spec.start_part); + } + DBUG_VOID_RETURN; +} + + +/* + Call extra on all partitions + + SYNOPSIS + loop_extra() + operation extra operation type + + RETURN VALUE + >0 Error code + 0 Success +*/ + +int ha_partition::loop_extra(enum ha_extra_function operation) +{ + int result= 0, tmp; + handler **file; + DBUG_ENTER("ha_partition::loop_extra()"); + /* + TODO, 5.2: this is where you could possibly add optimisations to add the bitmap + _if_ a SELECT. + */ + for (file= m_file; *file; file++) + { + if ((tmp= (*file)->extra(operation))) + result= tmp; + } + DBUG_RETURN(result); +} + + +/* + Call extra(HA_EXTRA_CACHE) on next partition_id + + SYNOPSIS + late_extra_cache() + partition_id Partition id to call extra on + + RETURN VALUE + NONE +*/ + +void ha_partition::late_extra_cache(uint partition_id) +{ + handler *file; + DBUG_ENTER("ha_partition::late_extra_cache"); + + if (!m_extra_cache) + DBUG_VOID_RETURN; + file= m_file[partition_id]; + if (m_extra_cache_size == 0) + VOID(file->extra(HA_EXTRA_CACHE)); + else + VOID(file->extra_opt(HA_EXTRA_CACHE, m_extra_cache_size)); + DBUG_VOID_RETURN; +} + + +/* + Call extra(HA_EXTRA_NO_CACHE) on next partition_id + + SYNOPSIS + late_extra_no_cache() + partition_id Partition id to call extra on + + RETURN VALUE + NONE +*/ + +void ha_partition::late_extra_no_cache(uint partition_id) +{ + handler *file; + DBUG_ENTER("ha_partition::late_extra_no_cache"); + + if (!m_extra_cache) + DBUG_VOID_RETURN; + file= m_file[partition_id]; + VOID(file->extra(HA_EXTRA_NO_CACHE)); + DBUG_VOID_RETURN; +} + + +/**************************************************************************** + MODULE optimiser support +****************************************************************************/ + +/* + Get keys to use for scanning + + SYNOPSIS + keys_to_use_for_scanning() + + RETURN VALUE + key_map of keys usable for scanning +*/ + +const key_map *ha_partition::keys_to_use_for_scanning() +{ + DBUG_ENTER("ha_partition::keys_to_use_for_scanning"); + + DBUG_RETURN(m_file[0]->keys_to_use_for_scanning()); +} + + +/* + Return time for a scan of the table + + SYNOPSIS + scan_time() + + RETURN VALUE + time for scan +*/ + +double ha_partition::scan_time() +{ + double scan_time= 0; + handler **file; + DBUG_ENTER("ha_partition::scan_time"); + + for (file= m_file; *file; file++) + if (bitmap_is_set(&(m_part_info->used_partitions), (file - m_file))) + scan_time+= (*file)->scan_time(); + DBUG_RETURN(scan_time); +} + + +/* + Get time to read + + SYNOPSIS + read_time() + index Index number used + ranges Number of ranges + rows Number of rows + + RETURN VALUE + time for read + + DESCRIPTION + This will be optimised later to include whether or not the index can + be used with partitioning. To achieve we need to add another parameter + that specifies how many of the index fields that are bound in the ranges. + Possibly added as a new call to handlers. +*/ + +double ha_partition::read_time(uint index, uint ranges, ha_rows rows) +{ + DBUG_ENTER("ha_partition::read_time"); + + DBUG_RETURN(m_file[0]->read_time(index, ranges, rows)); +} + +/* + Find number of records in a range + + SYNOPSIS + records_in_range() + inx Index number + min_key Start of range + max_key End of range + + RETURN VALUE + Number of rows in range + + DESCRIPTION + Given a starting key, and an ending key estimate the number of rows that + will exist between the two. end_key may be empty which in case determine + if start_key matches any rows. + + Called from opt_range.cc by check_quick_keys(). + + monty: MUST be called for each range and added. + Note that MySQL will assume that if this returns 0 there is no + matching rows for the range! +*/ + +ha_rows ha_partition::records_in_range(uint inx, key_range *min_key, + key_range *max_key) +{ + handler **file; + ha_rows in_range= 0; + DBUG_ENTER("ha_partition::records_in_range"); + + file= m_file; + do + { + if (bitmap_is_set(&(m_part_info->used_partitions), (file - m_file))) + { + ha_rows tmp_in_range= (*file)->records_in_range(inx, min_key, max_key); + if (tmp_in_range == HA_POS_ERROR) + DBUG_RETURN(tmp_in_range); + in_range+= tmp_in_range; + } + } while (*(++file)); + DBUG_RETURN(in_range); +} + + +/* + Estimate upper bound of number of rows + + SYNOPSIS + estimate_rows_upper_bound() + + RETURN VALUE + Number of rows +*/ + +ha_rows ha_partition::estimate_rows_upper_bound() +{ + ha_rows rows, tot_rows= 0; + handler **file; + DBUG_ENTER("ha_partition::estimate_rows_upper_bound"); + + file= m_file; + do + { + if (bitmap_is_set(&(m_part_info->used_partitions), (file - m_file))) + { + rows= (*file)->estimate_rows_upper_bound(); + if (rows == HA_POS_ERROR) + DBUG_RETURN(HA_POS_ERROR); + tot_rows+= rows; + } + } while (*(++file)); + DBUG_RETURN(tot_rows); +} + + +/* + Is it ok to switch to a new engine for this table + + SYNOPSIS + can_switch_engine() + + RETURN VALUE + TRUE Ok + FALSE Not ok + + DESCRIPTION + Used to ensure that tables with foreign key constraints are not moved + to engines without foreign key support. +*/ + +bool ha_partition::can_switch_engines() +{ + handler **file; + DBUG_ENTER("ha_partition::can_switch_engines"); + + file= m_file; + do + { + if (!(*file)->can_switch_engines()) + DBUG_RETURN(FALSE); + } while (*(++file)); + DBUG_RETURN(TRUE); +} + + +/* + Is table cache supported + + SYNOPSIS + table_cache_type() + +*/ + +uint8 ha_partition::table_cache_type() +{ + DBUG_ENTER("ha_partition::table_cache_type"); + + DBUG_RETURN(m_file[0]->table_cache_type()); +} + + +/**************************************************************************** + MODULE print messages +****************************************************************************/ + +const char *ha_partition::index_type(uint inx) +{ + DBUG_ENTER("ha_partition::index_type"); + + DBUG_RETURN(m_file[0]->index_type(inx)); +} + + +void ha_partition::print_error(int error, myf errflag) +{ + DBUG_ENTER("ha_partition::print_error"); + + /* Should probably look for my own errors first */ + /* monty: needs to be called for the last used partition ! */ + DBUG_PRINT("enter", ("error = %d", error)); + + if (error == HA_ERR_NO_PARTITION_FOUND) + { + char buf[100]; + my_error(ER_NO_PARTITION_FOR_GIVEN_VALUE, MYF(0), + llstr(m_part_info->part_expr->val_int(), buf)); + } + else + m_file[0]->print_error(error, errflag); + DBUG_VOID_RETURN; +} + + +bool ha_partition::get_error_message(int error, String *buf) +{ + DBUG_ENTER("ha_partition::get_error_message"); + + /* Should probably look for my own errors first */ + /* monty: needs to be called for the last used partition ! */ + DBUG_RETURN(m_file[0]->get_error_message(error, buf)); +} + + +/**************************************************************************** + MODULE handler characteristics +****************************************************************************/ +/* + If frm_error() is called then we will use this to to find out what file + extensions exist for the storage engine. This is also used by the default + rename_table and delete_table method in handler.cc. +*/ + +static const char *ha_partition_ext[]= +{ + ha_par_ext, NullS +}; + +const char **ha_partition::bas_ext() const +{ return ha_partition_ext; } + + +uint ha_partition::min_of_the_max_uint( + uint (handler::*operator_func)(void) const) const +{ + handler **file; + uint min_of_the_max= ((*m_file)->*operator_func)(); + + for (file= m_file+1; *file; file++) + { + uint tmp= ((*file)->*operator_func)(); + set_if_smaller(min_of_the_max, tmp); + } + return min_of_the_max; +} + + +uint ha_partition::max_supported_key_parts() const +{ + return min_of_the_max_uint(&handler::max_supported_key_parts); +} + + +uint ha_partition::max_supported_key_length() const +{ + return min_of_the_max_uint(&handler::max_supported_key_length); +} + + +uint ha_partition::max_supported_key_part_length() const +{ + return min_of_the_max_uint(&handler::max_supported_key_part_length); +} + + +uint ha_partition::max_supported_record_length() const +{ + return min_of_the_max_uint(&handler::max_supported_record_length); +} + + +uint ha_partition::max_supported_keys() const +{ + return min_of_the_max_uint(&handler::max_supported_keys); +} + + +uint ha_partition::extra_rec_buf_length() const +{ + handler **file; + uint max= (*m_file)->extra_rec_buf_length(); + + for (file= m_file, file++; *file; file++) + if (max < (*file)->extra_rec_buf_length()) + max= (*file)->extra_rec_buf_length(); + return max; +} + + +uint ha_partition::min_record_length(uint options) const +{ + handler **file; + uint max= (*m_file)->min_record_length(options); + + for (file= m_file, file++; *file; file++) + if (max < (*file)->min_record_length(options)) + max= (*file)->min_record_length(options); + return max; +} + + +/**************************************************************************** + MODULE compare records +****************************************************************************/ +/* + Compare two positions + + SYNOPSIS + cmp_ref() + ref1 First position + ref2 Second position + + RETURN VALUE + <0 ref1 < ref2 + 0 Equal + >0 ref1 > ref2 + + DESCRIPTION + We get two references and need to check if those records are the same. + If they belong to different partitions we decide that they are not + the same record. Otherwise we use the particular handler to decide if + they are the same. Sort in partition id order if not equal. +*/ + +int ha_partition::cmp_ref(const byte *ref1, const byte *ref2) +{ + uint part_id; + my_ptrdiff_t diff1, diff2; + handler *file; + DBUG_ENTER("ha_partition::cmp_ref"); + + if ((ref1[0] == ref2[0]) && (ref1[1] == ref2[1])) + { + part_id= uint2korr(ref1); + file= m_file[part_id]; + DBUG_ASSERT(part_id < m_tot_parts); + DBUG_RETURN(file->cmp_ref((ref1 + PARTITION_BYTES_IN_POS), + (ref2 + PARTITION_BYTES_IN_POS))); + } + diff1= ref2[1] - ref1[1]; + diff2= ref2[0] - ref1[0]; + if (diff1 > 0) + { + DBUG_RETURN(-1); + } + if (diff1 < 0) + { + DBUG_RETURN(+1); + } + if (diff2 > 0) + { + DBUG_RETURN(-1); + } + DBUG_RETURN(+1); +} + + +/**************************************************************************** + MODULE auto increment +****************************************************************************/ + +void ha_partition::restore_auto_increment() +{ + DBUG_ENTER("ha_partition::restore_auto_increment"); + + DBUG_VOID_RETURN; +} + + +/* + This method is called by update_auto_increment which in turn is called + by the individual handlers as part of write_row. We will always let + the first handler keep track of the auto increment value for all + partitions. +*/ + +ulonglong ha_partition::get_auto_increment() +{ + DBUG_ENTER("ha_partition::get_auto_increment"); + + DBUG_RETURN(m_file[0]->get_auto_increment()); +} + + +/**************************************************************************** + MODULE initialise handler for HANDLER call +****************************************************************************/ + +void ha_partition::init_table_handle_for_HANDLER() +{ + return; +} + + +/**************************************************************************** + MODULE Partition Share +****************************************************************************/ +/* + Service routines for ... methods. +------------------------------------------------------------------------- + Variables for partition share methods. A hash used to track open tables. + A mutex for the hash table and an init variable to check if hash table + is initialised. + There is also a constant ending of the partition handler file name. +*/ + +#ifdef NOT_USED +static HASH partition_open_tables; +static pthread_mutex_t partition_mutex; +static int partition_init= 0; + + +/* + Function we use in the creation of our hash to get key. +*/ + +static byte *partition_get_key(PARTITION_SHARE *share, uint *length, + my_bool not_used __attribute__ ((unused))) +{ + *length= share->table_name_length; + return (byte *) share->table_name; +} + +/* + Example of simple lock controls. The "share" it creates is structure we + will pass to each partition handler. Do you have to have one of these? + Well, you have pieces that are used for locking, and they are needed to + function. +*/ + +static PARTITION_SHARE *get_share(const char *table_name, TABLE *table) +{ + PARTITION_SHARE *share; + uint length; + char *tmp_name; + + /* + So why does this exist? There is no way currently to init a storage + engine. + Innodb and BDB both have modifications to the server to allow them to + do this. Since you will not want to do this, this is probably the next + best method. + */ + if (!partition_init) + { + /* Hijack a mutex for init'ing the storage engine */ + pthread_mutex_lock(&LOCK_mysql_create_db); + if (!partition_init) + { + partition_init++; + VOID(pthread_mutex_init(&partition_mutex, MY_MUTEX_INIT_FAST)); + (void) hash_init(&partition_open_tables, system_charset_info, 32, 0, 0, + (hash_get_key) partition_get_key, 0, 0); + } + pthread_mutex_unlock(&LOCK_mysql_create_db); + } + pthread_mutex_lock(&partition_mutex); + length= (uint) strlen(table_name); + + if (!(share= (PARTITION_SHARE *) hash_search(&partition_open_tables, + (byte *) table_name, length))) + { + if (!(share= (PARTITION_SHARE *) + my_multi_malloc(MYF(MY_WME | MY_ZEROFILL), + &share, sizeof(*share), + &tmp_name, length + 1, NullS))) + { + pthread_mutex_unlock(&partition_mutex); + return NULL; + } + + share->use_count= 0; + share->table_name_length= length; + share->table_name= tmp_name; + strmov(share->table_name, table_name); + if (my_hash_insert(&partition_open_tables, (byte *) share)) + goto error; + thr_lock_init(&share->lock); + pthread_mutex_init(&share->mutex, MY_MUTEX_INIT_FAST); + } + share->use_count++; + pthread_mutex_unlock(&partition_mutex); + + return share; + +error: + pthread_mutex_unlock(&partition_mutex); + my_free((gptr) share, MYF(0)); + + return NULL; +} + + +/* + Free lock controls. We call this whenever we close a table. If the table + had the last reference to the share then we free memory associated with + it. +*/ + +static int free_share(PARTITION_SHARE *share) +{ + pthread_mutex_lock(&partition_mutex); + if (!--share->use_count) + { + hash_delete(&partition_open_tables, (byte *) share); + thr_lock_delete(&share->lock); + pthread_mutex_destroy(&share->mutex); + my_free((gptr) share, MYF(0)); + } + pthread_mutex_unlock(&partition_mutex); + + return 0; +} +#endif /* NOT_USED */ diff --git a/sql/ha_partition.h b/sql/ha_partition.h new file mode 100644 index 00000000000..40306ba7da8 --- /dev/null +++ b/sql/ha_partition.h @@ -0,0 +1,958 @@ +/* Copyright (C) 2005 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifdef __GNUC__ +#pragma interface /* gcc class implementation */ +#endif + +enum partition_keywords +{ + PKW_HASH= 0, PKW_RANGE, PKW_LIST, PKW_KEY, PKW_MAXVALUE, PKW_LINEAR +}; + +/* + PARTITION_SHARE is a structure that will be shared amoung all open handlers + The partition implements the minimum of what you will probably need. +*/ + +typedef struct st_partition_share +{ + char *table_name; + uint table_name_length, use_count; + pthread_mutex_t mutex; + THR_LOCK lock; +} PARTITION_SHARE; + + +#define PARTITION_BYTES_IN_POS 2 +class ha_partition :public handler +{ +private: + enum partition_index_scan_type + { + partition_index_read= 0, + partition_index_first= 1, + partition_index_last= 2, + partition_no_index_scan= 3 + }; + /* Data for the partition handler */ + int m_mode; // Open mode + uint m_open_test_lock; // Open test_if_locked + char *m_file_buffer; // Buffer with names + char *m_name_buffer_ptr; // Pointer to first partition name + handlerton **m_engine_array; // Array of types of the handlers + handler **m_file; // Array of references to handler inst. + handler **m_new_file; // Array of references to new handlers + handler **m_reorged_file; // Reorganised partitions + handler **m_added_file; // Added parts kept for errors + partition_info *m_part_info; // local reference to partition + byte *m_start_key_ref; // Reference of start key in current + // index scan info + Field **m_part_field_array; // Part field array locally to save acc + byte *m_ordered_rec_buffer; // Row and key buffer for ord. idx scan + KEY *m_curr_key_info; // Current index + byte *m_rec0; // table->record[0] + QUEUE m_queue; // Prio queue used by sorted read + /* + Since the partition handler is a handler on top of other handlers, it + is necessary to keep information about what the underlying handler + characteristics is. It is not possible to keep any handler instances + for this since the MySQL Server sometimes allocating the handler object + without freeing them. + */ + u_long m_table_flags; + u_long m_low_byte_first; + + uint m_reorged_parts; // Number of reorganised parts + uint m_tot_parts; // Total number of partitions; + uint m_no_locks; // For engines like ha_blackhole, which needs no locks + uint m_last_part; // Last file that we update,write + int m_lock_type; // Remembers type of last + // external_lock + part_id_range m_part_spec; // Which parts to scan + uint m_scan_value; // Value passed in rnd_init + // call + uint m_ref_length; // Length of position in this + // handler object + key_range m_start_key; // index read key range + enum partition_index_scan_type m_index_scan_type;// What type of index + // scan + uint m_top_entry; // Which partition is to + // deliver next result + uint m_rec_length; // Local copy of record length + + bool m_ordered; // Ordered/Unordered index scan + bool m_has_transactions; // Can we support transactions + bool m_pkey_is_clustered; // Is primary key clustered + bool m_create_handler; // Handler used to create table + bool m_is_sub_partitioned; // Is subpartitioned + bool m_ordered_scan_ongoing; + + /* + We keep track if all underlying handlers are MyISAM since MyISAM has a + great number of extra flags not needed by other handlers. + */ + bool m_myisam; // Are all underlying handlers + // MyISAM + /* + We keep track of InnoDB handlers below since it requires proper setting + of query_id in fields at index_init and index_read calls. + */ + bool m_innodb; // Are all underlying handlers + // InnoDB + /* + When calling extra(HA_EXTRA_CACHE) we do not pass this to the underlying + handlers immediately. Instead we cache it and call the underlying + immediately before starting the scan on the partition. This is to + prevent allocating a READ CACHE for each partition in parallel when + performing a full table scan on MyISAM partitioned table. + This state is cleared by extra(HA_EXTRA_NO_CACHE). + */ + bool m_extra_cache; + uint m_extra_cache_size; + + void init_handler_variables(); + /* + Variables for lock structures. + */ + THR_LOCK_DATA lock; /* MySQL lock */ + PARTITION_SHARE *share; /* Shared lock info */ + +public: + virtual void set_part_info(partition_info *part_info) + { + m_part_info= part_info; + m_is_sub_partitioned= part_info->is_sub_partitioned(); + } + /* + ------------------------------------------------------------------------- + MODULE create/delete handler object + ------------------------------------------------------------------------- + Object create/delete methode. The normal called when a table object + exists. There is also a method to create the handler object with only + partition information. This is used from mysql_create_table when the + table is to be created and the engine type is deduced to be the + partition handler. + ------------------------------------------------------------------------- + */ + ha_partition(TABLE_SHARE * table); + ha_partition(partition_info * part_info); + ~ha_partition(); + /* + A partition handler has no characteristics in itself. It only inherits + those from the underlying handlers. Here we set-up those constants to + enable later calls of the methods to retrieve constants from the under- + lying handlers. Returns false if not successful. + */ + int ha_initialise(); + + /* + ------------------------------------------------------------------------- + MODULE meta data changes + ------------------------------------------------------------------------- + Meta data routines to CREATE, DROP, RENAME table and often used at + ALTER TABLE (update_create_info used from ALTER TABLE and SHOW ..). + + update_table_comment is used in SHOW TABLE commands to provide a + chance for the handler to add any interesting comments to the table + comments not provided by the users comment. + + create_handler_files is called before opening a new handler object + with openfrm to call create. It is used to create any local handler + object needed in opening the object in openfrm + ------------------------------------------------------------------------- + */ + virtual int delete_table(const char *from); + virtual int rename_table(const char *from, const char *to); + virtual int create(const char *name, TABLE *form, + HA_CREATE_INFO *create_info); + virtual int create_handler_files(const char *name); + virtual void update_create_info(HA_CREATE_INFO *create_info); + virtual char *update_table_comment(const char *comment); + virtual int change_partitions(HA_CREATE_INFO *create_info, + const char *path, + ulonglong *copied, + ulonglong *deleted, + const void *pack_frm_data, + uint pack_frm_len); + virtual int drop_partitions(const char *path); + virtual int rename_partitions(const char *path); + bool get_no_parts(const char *name, uint *no_parts) + { + DBUG_ENTER("ha_partition::get_no_parts"); + *no_parts= m_tot_parts; + DBUG_RETURN(0); + } +private: + int copy_partitions(ulonglong *copied, ulonglong *deleted); + void cleanup_new_partition(uint part_count); + int prepare_new_partition(TABLE *table, HA_CREATE_INFO *create_info, + handler *file, const char *part_name); + /* + delete_table, rename_table and create uses very similar logic which + is packed into this routine. + */ + uint del_ren_cre_table(const char *from, + const char *to= NULL, + TABLE *table_arg= NULL, + HA_CREATE_INFO *create_info= NULL); + /* + One method to create the table_name.par file containing the names of the + underlying partitions, their engine and the number of partitions. + And one method to read it in. + */ + bool create_handler_file(const char *name); + bool get_from_handler_file(const char *name); + bool new_handlers_from_part_info(); + bool create_handlers(); + void clear_handler_file(); + void set_up_table_before_create(TABLE *table_arg, + const char *partition_name_with_path, + HA_CREATE_INFO *info, + uint part_id); + partition_element *find_partition_element(uint part_id); +public: + + /* + ------------------------------------------------------------------------- + MODULE open/close object + ------------------------------------------------------------------------- + Open and close handler object to ensure all underlying files and + objects allocated and deallocated for query handling is handled + properly. + ------------------------------------------------------------------------- + + A handler object is opened as part of its initialisation and before + being used for normal queries (not before meta-data changes always. + If the object was opened it will also be closed before being deleted. + */ + virtual int open(const char *name, int mode, uint test_if_locked); + virtual int close(void); + + /* + ------------------------------------------------------------------------- + MODULE start/end statement + ------------------------------------------------------------------------- + This module contains methods that are used to understand start/end of + statements, transaction boundaries, and aid for proper concurrency + control. + The partition handler need not implement abort and commit since this + will be handled by any underlying handlers implementing transactions. + There is only one call to each handler type involved per transaction + and these go directly to the handlers supporting transactions + currently InnoDB, BDB and NDB). + ------------------------------------------------------------------------- + */ + virtual THR_LOCK_DATA **store_lock(THD * thd, THR_LOCK_DATA ** to, + enum thr_lock_type lock_type); + virtual int external_lock(THD * thd, int lock_type); + /* + When table is locked a statement is started by calling start_stmt + instead of external_lock + */ + virtual int start_stmt(THD * thd, thr_lock_type lock_type); + /* + Lock count is number of locked underlying handlers (I assume) + */ + virtual uint lock_count(void) const; + /* + Call to unlock rows not to be updated in transaction + */ + virtual void unlock_row(); + + /* + ------------------------------------------------------------------------- + MODULE change record + ------------------------------------------------------------------------- + This part of the handler interface is used to change the records + after INSERT, DELETE, UPDATE, REPLACE method calls but also other + special meta-data operations as ALTER TABLE, LOAD DATA, TRUNCATE. + ------------------------------------------------------------------------- + + These methods are used for insert (write_row), update (update_row) + and delete (delete_row). All methods to change data always work on + one row at a time. update_row and delete_row also contains the old + row. + delete_all_rows will delete all rows in the table in one call as a + special optimisation for DELETE from table; + + Bulk inserts are supported if all underlying handlers support it. + start_bulk_insert and end_bulk_insert is called before and after a + number of calls to write_row. + Not yet though. + */ + virtual int write_row(byte * buf); + virtual int update_row(const byte * old_data, byte * new_data); + virtual int delete_row(const byte * buf); + virtual int delete_all_rows(void); + virtual void start_bulk_insert(ha_rows rows); + virtual int end_bulk_insert(); + + /* + ------------------------------------------------------------------------- + MODULE full table scan + ------------------------------------------------------------------------- + This module is used for the most basic access method for any table + handler. This is to fetch all data through a full table scan. No + indexes are needed to implement this part. + It contains one method to start the scan (rnd_init) that can also be + called multiple times (typical in a nested loop join). Then proceeding + to the next record (rnd_next) and closing the scan (rnd_end). + To remember a record for later access there is a method (position) + and there is a method used to retrieve the record based on the stored + position. + The position can be a file position, a primary key, a ROWID dependent + on the handler below. + ------------------------------------------------------------------------- + */ + /* + unlike index_init(), rnd_init() can be called two times + without rnd_end() in between (it only makes sense if scan=1). + then the second call should prepare for the new table scan + (e.g if rnd_init allocates the cursor, second call should + position it to the start of the table, no need to deallocate + and allocate it again + */ + virtual int rnd_init(bool scan); + virtual int rnd_end(); + virtual int rnd_next(byte * buf); + virtual int rnd_pos(byte * buf, byte * pos); + virtual void position(const byte * record); + + /* + ------------------------------------------------------------------------- + MODULE index scan + ------------------------------------------------------------------------- + This part of the handler interface is used to perform access through + indexes. The interface is defined as a scan interface but the handler + can also use key lookup if the index is a unique index or a primary + key index. + Index scans are mostly useful for SELECT queries but are an important + part also of UPDATE, DELETE, REPLACE and CREATE TABLE table AS SELECT + and so forth. + Naturally an index is needed for an index scan and indexes can either + be ordered, hash based. Some ordered indexes can return data in order + but not necessarily all of them. + There are many flags that define the behavior of indexes in the + various handlers. These methods are found in the optimizer module. + ------------------------------------------------------------------------- + + index_read is called to start a scan of an index. The find_flag defines + the semantics of the scan. These flags are defined in + include/my_base.h + index_read_idx is the same but also initializes index before calling doing + the same thing as index_read. Thus it is similar to index_init followed + by index_read. This is also how we implement it. + + index_read/index_read_idx does also return the first row. Thus for + key lookups, the index_read will be the only call to the handler in + the index scan. + + index_init initializes an index before using it and index_end does + any end processing needed. + */ + virtual int index_read(byte * buf, const byte * key, + uint key_len, enum ha_rkey_function find_flag); + virtual int index_read_idx(byte * buf, uint idx, const byte * key, + uint key_len, enum ha_rkey_function find_flag); + virtual int index_init(uint idx, bool sorted); + virtual int index_end(); + + /* + These methods are used to jump to next or previous entry in the index + scan. There are also methods to jump to first and last entry. + */ + virtual int index_next(byte * buf); + virtual int index_prev(byte * buf); + virtual int index_first(byte * buf); + virtual int index_last(byte * buf); + virtual int index_next_same(byte * buf, const byte * key, uint keylen); + virtual int index_read_last(byte * buf, const byte * key, uint keylen); + + /* + read_first_row is virtual method but is only implemented by + handler.cc, no storage engine has implemented it so neither + will the partition handler. + + virtual int read_first_row(byte *buf, uint primary_key); + */ + + /* + We don't implement multi read range yet, will do later. + virtual int read_multi_range_first(KEY_MULTI_RANGE **found_range_p, + KEY_MULTI_RANGE *ranges, uint range_count, + bool sorted, HANDLER_BUFFER *buffer); + virtual int read_multi_range_next(KEY_MULTI_RANGE **found_range_p); + */ + + + virtual int read_range_first(const key_range * start_key, + const key_range * end_key, + bool eq_range, bool sorted); + virtual int read_range_next(); + +private: + int common_index_read(byte * buf, const byte * key, + uint key_len, enum ha_rkey_function find_flag); + int common_first_last(byte * buf); + int partition_scan_set_up(byte * buf, bool idx_read_flag); + int handle_unordered_next(byte * buf, bool next_same); + int handle_unordered_scan_next_partition(byte * buf); + byte *queue_buf(uint part_id) + { + return (m_ordered_rec_buffer + + (part_id * (m_rec_length + PARTITION_BYTES_IN_POS))); + } + byte *rec_buf(uint part_id) + { + return (queue_buf(part_id) + + PARTITION_BYTES_IN_POS); + } + int handle_ordered_index_scan(byte * buf); + int handle_ordered_next(byte * buf, bool next_same); + int handle_ordered_prev(byte * buf); + void return_top_record(byte * buf); + void include_partition_fields_in_used_fields(); +public: + /* + ------------------------------------------------------------------------- + MODULE information calls + ------------------------------------------------------------------------- + This calls are used to inform the handler of specifics of the ongoing + scans and other actions. Most of these are used for optimisation + purposes. + ------------------------------------------------------------------------- + */ + virtual void info(uint); + void get_dynamic_partition_info(PARTITION_INFO *stat_info, + uint part_id); + virtual int extra(enum ha_extra_function operation); + virtual int extra_opt(enum ha_extra_function operation, ulong cachesize); + virtual int reset(void); + +private: + static const uint NO_CURRENT_PART_ID= 0xFFFFFFFF; + int loop_extra(enum ha_extra_function operation); + void late_extra_cache(uint partition_id); + void late_extra_no_cache(uint partition_id); + void prepare_extra_cache(uint cachesize); +public: + + /* + ------------------------------------------------------------------------- + MODULE optimiser support + ------------------------------------------------------------------------- + ------------------------------------------------------------------------- + */ + + /* + NOTE !!!!!! + ------------------------------------------------------------------------- + ------------------------------------------------------------------------- + One important part of the public handler interface that is not depicted in + the methods is the attribute records + + which is defined in the base class. This is looked upon directly and is + set by calling info(HA_STATUS_INFO) ? + ------------------------------------------------------------------------- + */ + + /* + keys_to_use_for_scanning can probably be implemented as the + intersection of all underlying handlers if mixed handlers are used. + This method is used to derive whether an index can be used for + index-only scanning when performing an ORDER BY query. + Only called from one place in sql_select.cc + */ + virtual const key_map *keys_to_use_for_scanning(); + + /* + Called in test_quick_select to determine if indexes should be used. + */ + virtual double scan_time(); + + /* + The next method will never be called if you do not implement indexes. + */ + virtual double read_time(uint index, uint ranges, ha_rows rows); + /* + For the given range how many records are estimated to be in this range. + Used by optimiser to calculate cost of using a particular index. + */ + virtual ha_rows records_in_range(uint inx, key_range * min_key, + key_range * max_key); + + /* + Upper bound of number records returned in scan is sum of all + underlying handlers. + */ + virtual ha_rows estimate_rows_upper_bound(); + + /* + table_cache_type is implemented by the underlying handler but all + underlying handlers must have the same implementation for it to work. + */ + virtual uint8 table_cache_type(); + + /* + ------------------------------------------------------------------------- + MODULE print messages + ------------------------------------------------------------------------- + This module contains various methods that returns text messages for + table types, index type and error messages. + ------------------------------------------------------------------------- + */ + /* + The name of the index type that will be used for display + Here we must ensure that all handlers use the same index type + for each index created. + */ + virtual const char *index_type(uint inx); + + /* The name of the table type that will be used for display purposes */ + virtual const char *table_type() const + { return "PARTITION"; } + + /* + Handler specific error messages + */ + virtual void print_error(int error, myf errflag); + virtual bool get_error_message(int error, String * buf); + /* + ------------------------------------------------------------------------- + MODULE handler characteristics + ------------------------------------------------------------------------- + This module contains a number of methods defining limitations and + characteristics of the handler. The partition handler will calculate + this characteristics based on underlying handler characteristics. + ------------------------------------------------------------------------- + + This is a list of flags that says what the storage engine + implements. The current table flags are documented in handler.h + The partition handler will support whatever the underlying handlers + support except when specifically mentioned below about exceptions + to this rule. + + HA_READ_RND_SAME: + Not currently used. (Means that the handler supports the rnd_same() call) + (MyISAM, HEAP) + + HA_TABLE_SCAN_ON_INDEX: + Used to avoid scanning full tables on an index. If this flag is set then + the handler always has a primary key (hidden if not defined) and this + index is used for scanning rather than a full table scan in all + situations. + (InnoDB, BDB, Federated) + + HA_REC_NOT_IN_SEQ: + This flag is set for handlers that cannot guarantee that the rows are + returned accroding to incremental positions (0, 1, 2, 3...). + This also means that rnd_next() should return HA_ERR_RECORD_DELETED + if it finds a deleted row. + (MyISAM (not fixed length row), BDB, HEAP, NDB, InooDB) + + HA_CAN_GEOMETRY: + Can the storage engine handle spatial data. + Used to check that no spatial attributes are declared unless + the storage engine is capable of handling it. + (MyISAM) + + HA_FAST_KEY_READ: + Setting this flag indicates that the handler is equally fast in + finding a row by key as by position. + This flag is used in a very special situation in conjunction with + filesort's. For further explanation see intro to init_read_record. + (BDB, HEAP, InnoDB) + + HA_NULL_IN_KEY: + Is NULL values allowed in indexes. + If this is not allowed then it is not possible to use an index on a + NULLable field. + (BDB, HEAP, MyISAM, NDB, InnoDB) + + HA_DUPP_POS: + Tells that we can the position for the conflicting duplicate key + record is stored in table->file->dupp_ref. (insert uses rnd_pos() on + this to find the duplicated row) + (MyISAM) + + HA_CAN_INDEX_BLOBS: + Is the storage engine capable of defining an index of a prefix on + a BLOB attribute. + (BDB, Federated, MyISAM, InnoDB) + + HA_AUTO_PART_KEY: + Auto increment fields can be part of a multi-part key. For second part + auto-increment keys, the auto_incrementing is done in handler.cc + (BDB, Federated, MyISAM, NDB) + + HA_REQUIRE_PRIMARY_KEY: + Can't define a table without primary key (and cannot handle a table + with hidden primary key) + (No handler has this limitation currently) + + HA_NOT_EXACT_COUNT: + Does the counter of records after the info call specify an exact + value or not. If it doesn't this flag is set. + Only MyISAM and HEAP uses exact count. + (MyISAM, HEAP, BDB, InnoDB, NDB, Federated) + + HA_CAN_INSERT_DELAYED: + Can the storage engine support delayed inserts. + To start with the partition handler will not support delayed inserts. + Further investigation needed. + (HEAP, MyISAM) + + HA_PRIMARY_KEY_IN_READ_INDEX: + This parameter is set when the handler will also return the primary key + when doing read-only-key on another index. + + HA_NOT_DELETE_WITH_CACHE: + Seems to be an old MyISAM feature that is no longer used. No handler + has it defined but it is checked in init_read_record. + Further investigation needed. + (No handler defines it) + + HA_NO_PREFIX_CHAR_KEYS: + Indexes on prefixes of character fields is not allowed. + (NDB) + + HA_CAN_FULLTEXT: + Does the storage engine support fulltext indexes + The partition handler will start by not supporting fulltext indexes. + (MyISAM) + + HA_CAN_SQL_HANDLER: + Can the HANDLER interface in the MySQL API be used towards this + storage engine. + (MyISAM, InnoDB) + + HA_NO_AUTO_INCREMENT: + Set if the storage engine does not support auto increment fields. + (Currently not set by any handler) + + HA_HAS_CHECKSUM: + Special MyISAM feature. Has special SQL support in CREATE TABLE. + No special handling needed by partition handler. + (MyISAM) + + HA_FILE_BASED: + Should file names always be in lower case (used by engines + that map table names to file names. + Since partition handler has a local file this flag is set. + (BDB, Federated, MyISAM) + + HA_CAN_BIT_FIELD: + Is the storage engine capable of handling bit fields? + (MyISAM, NDB) + + HA_NEED_READ_RANGE_BUFFER: + Is Read Multi-Range supported => need multi read range buffer + This parameter specifies whether a buffer for read multi range + is needed by the handler. Whether the handler supports this + feature or not is dependent of whether the handler implements + read_multi_range* calls or not. The only handler currently + supporting this feature is NDB so the partition handler need + not handle this call. There are methods in handler.cc that will + transfer those calls into index_read and other calls in the + index scan module. + (NDB) + */ + virtual ulong table_flags() const + { return m_table_flags; } + + /* + This is a bitmap of flags that says how the storage engine + implements indexes. The current index flags are documented in + handler.h. If you do not implement indexes, just return zero + here. + + part is the key part to check. First key part is 0 + If all_parts it's set, MySQL want to know the flags for the combined + index up to and including 'part'. + + HA_READ_NEXT: + Does the index support read next, this is assumed in the server + code and never checked so all indexes must support this. + Note that the handler can be used even if it doesn't have any index. + (BDB, HEAP, MyISAM, Federated, NDB, InnoDB) + + HA_READ_PREV: + Can the index be used to scan backwards. + (BDB, HEAP, MyISAM, NDB, InnoDB) + + HA_READ_ORDER: + Can the index deliver its record in index order. Typically true for + all ordered indexes and not true for hash indexes. + In first step this is not true for partition handler until a merge + sort has been implemented in partition handler. + Used to set keymap part_of_sortkey + This keymap is only used to find indexes usable for resolving an ORDER BY + in the query. Thus in most cases index_read will work just fine without + order in result production. When this flag is set it is however safe to + order all output started by index_read since most engines do this. With + read_multi_range calls there is a specific flag setting order or not + order so in those cases ordering of index output can be avoided. + (BDB, InnoDB, HEAP, MyISAM, NDB) + + HA_READ_RANGE: + Specify whether index can handle ranges, typically true for all + ordered indexes and not true for hash indexes. + Used by optimiser to check if ranges (as key >= 5) can be optimised + by index. + (BDB, InnoDB, NDB, MyISAM, HEAP) + + HA_ONLY_WHOLE_INDEX: + Can't use part key searches. This is typically true for hash indexes + and typically not true for ordered indexes. + (Federated, NDB, HEAP) + + HA_KEYREAD_ONLY: + Does the storage engine support index-only scans on this index. + Enables use of HA_EXTRA_KEYREAD and HA_EXTRA_NO_KEYREAD + Used to set key_map keys_for_keyread and to check in optimiser for + index-only scans. When doing a read under HA_EXTRA_KEYREAD the handler + only have to fill in the columns the key covers. If + HA_PRIMARY_KEY_IN_READ_INDEX is set then also the PRIMARY KEY columns + must be updated in the row. + (BDB, InnoDB, MyISAM) + */ + virtual ulong index_flags(uint inx, uint part, bool all_parts) const + { + return m_file[0]->index_flags(inx, part, all_parts); + } + + /* + extensions of table handler files + */ + virtual const char **bas_ext() const; + /* + unireg.cc will call the following to make sure that the storage engine + can handle the data it is about to send. + + The maximum supported values is the minimum of all handlers in the table + */ + uint min_of_the_max_uint(uint (handler::*operator_func)(void) const) const; + virtual uint max_supported_record_length() const; + virtual uint max_supported_keys() const; + virtual uint max_supported_key_parts() const; + virtual uint max_supported_key_length() const; + virtual uint max_supported_key_part_length() const; + + /* + All handlers in a partitioned table must have the same low_byte_first + */ + virtual bool low_byte_first() const + { return m_low_byte_first; } + + /* + The extra record buffer length is the maximum needed by all handlers. + The minimum record length is the maximum of all involved handlers. + */ + virtual uint extra_rec_buf_length() const; + virtual uint min_record_length(uint options) const; + + /* + Transactions on the table is supported if all handlers below support + transactions. + */ + virtual bool has_transactions() + { return m_has_transactions; } + + /* + Primary key is clustered can only be true if all underlying handlers have + this feature. + */ + virtual bool primary_key_is_clustered() + { return m_pkey_is_clustered; } + + /* + ------------------------------------------------------------------------- + MODULE compare records + ------------------------------------------------------------------------- + cmp_ref checks if two references are the same. For most handlers this is + a simple memcmp of the reference. However some handlers use primary key + as reference and this can be the same even if memcmp says they are + different. This is due to character sets and end spaces and so forth. + For the partition handler the reference is first two bytes providing the + partition identity of the referred record and then the reference of the + underlying handler. + Thus cmp_ref for the partition handler always returns FALSE for records + not in the same partition and uses cmp_ref on the underlying handler + to check whether the rest of the reference part is also the same. + ------------------------------------------------------------------------- + */ + virtual int cmp_ref(const byte * ref1, const byte * ref2); + /* + ------------------------------------------------------------------------- + MODULE auto increment + ------------------------------------------------------------------------- + This module is used to handle the support of auto increments. + + This variable in the handler is used as part of the handler interface + It is maintained by the parent handler object and should not be + touched by child handler objects (see handler.cc for its use). + + auto_increment_column_changed + ------------------------------------------------------------------------- + */ + virtual void restore_auto_increment(); + virtual ulonglong get_auto_increment(); + + /* + ------------------------------------------------------------------------- + MODULE initialise handler for HANDLER call + ------------------------------------------------------------------------- + This method is a special InnoDB method called before a HANDLER query. + ------------------------------------------------------------------------- + */ + virtual void init_table_handle_for_HANDLER(); + + /* + The remainder of this file defines the handler methods not implemented + by the partition handler + */ + + /* + ------------------------------------------------------------------------- + MODULE foreign key support + ------------------------------------------------------------------------- + The following methods are used to implement foreign keys as supported by + InnoDB. Implement this ?? + get_foreign_key_create_info is used by SHOW CREATE TABLE to get a textual + description of how the CREATE TABLE part to define FOREIGN KEY's is done. + free_foreign_key_create_info is used to free the memory area that provided + this description. + can_switch_engines checks if it is ok to switch to a new engine based on + the foreign key info in the table. + ------------------------------------------------------------------------- + + virtual char* get_foreign_key_create_info() + virtual void free_foreign_key_create_info(char* str) + + virtual int get_foreign_key_list(THD *thd, + List<FOREIGN_KEY_INFO> *f_key_list) + virtual uint referenced_by_foreign_key() + */ + virtual bool can_switch_engines(); + /* + ------------------------------------------------------------------------- + MODULE fulltext index + ------------------------------------------------------------------------- + Fulltext stuff not yet. + ------------------------------------------------------------------------- + virtual int ft_init() { return HA_ERR_WRONG_COMMAND; } + virtual FT_INFO *ft_init_ext(uint flags,uint inx,const byte *key, + uint keylen) + { return NULL; } + virtual int ft_read(byte *buf) { return HA_ERR_WRONG_COMMAND; } + */ + + /* + ------------------------------------------------------------------------- + MODULE restart full table scan at position (MyISAM) + ------------------------------------------------------------------------- + The following method is only used by MyISAM when used as + temporary tables in a join. + virtual int restart_rnd_next(byte *buf, byte *pos); + */ + + /* + ------------------------------------------------------------------------- + MODULE on-line ALTER TABLE + ------------------------------------------------------------------------- + These methods are in the handler interface but never used (yet) + They are to be used by on-line alter table add/drop index: + ------------------------------------------------------------------------- + virtual ulong index_ddl_flags(KEY *wanted_index) const + virtual int add_index(TABLE *table_arg,KEY *key_info,uint num_of_keys); + virtual int drop_index(TABLE *table_arg,uint *key_num,uint num_of_keys); + */ + + /* + ------------------------------------------------------------------------- + MODULE tablespace support + ------------------------------------------------------------------------- + Admin of table spaces is not applicable to the partition handler (InnoDB) + This means that the following method is not implemented: + ------------------------------------------------------------------------- + virtual int discard_or_import_tablespace(my_bool discard) + */ + + /* + ------------------------------------------------------------------------- + MODULE admin MyISAM + ------------------------------------------------------------------------- + + ------------------------------------------------------------------------- + OPTIMIZE TABLE, CHECK TABLE, ANALYZE TABLE and REPAIR TABLE are + mapped to a routine that handles looping over a given set of + partitions and those routines send a flag indicating to execute on + all partitions. + ------------------------------------------------------------------------- + */ + virtual int optimize(THD* thd, HA_CHECK_OPT *check_opt); + virtual int analyze(THD* thd, HA_CHECK_OPT *check_opt); + virtual int check(THD* thd, HA_CHECK_OPT *check_opt); + virtual int repair(THD* thd, HA_CHECK_OPT *check_opt); + virtual int optimize_partitions(THD *thd); + virtual int analyze_partitions(THD *thd); + virtual int check_partitions(THD *thd); + virtual int repair_partitions(THD *thd); + + private: + int handle_opt_partitions(THD *thd, HA_CHECK_OPT *check_opt, + uint flags, bool all_parts); + public: + /* + ------------------------------------------------------------------------- + Admin commands not supported currently (almost purely MyISAM routines) + This means that the following methods are not implemented: + ------------------------------------------------------------------------- + + virtual int backup(TD* thd, HA_CHECK_OPT *check_opt); + virtual int restore(THD* thd, HA_CHECK_OPT *check_opt); + virtual int assign_to_keycache(THD* thd, HA_CHECK_OPT *check_opt); + virtual int preload_keys(THD *thd, HA_CHECK_OPT *check_opt); + virtual bool check_and_repair(THD *thd); + virtual int dump(THD* thd, int fd = -1); + virtual int net_read_dump(NET* net); + virtual uint checksum() const; + virtual bool is_crashed() const; + virtual bool auto_repair() const; + + ------------------------------------------------------------------------- + MODULE enable/disable indexes + ------------------------------------------------------------------------- + Enable/Disable Indexes are not supported currently (Heap, MyISAM) + This means that the following methods are not implemented: + ------------------------------------------------------------------------- + virtual int disable_indexes(uint mode); + virtual int enable_indexes(uint mode); + virtual int indexes_are_disabled(void); + */ + + /* + ------------------------------------------------------------------------- + MODULE append_create_info + ------------------------------------------------------------------------- + append_create_info is only used by MyISAM MERGE tables and the partition + handler will not support this handler as underlying handler. + Implement this?? + ------------------------------------------------------------------------- + virtual void append_create_info(String *packet) + */ +}; diff --git a/sql/handler.cc b/sql/handler.cc index 006a0eb2407..3c79a1af8bd 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -22,109 +22,55 @@ #endif #include "mysql_priv.h" +#include "rpl_filter.h" #include "ha_heap.h" #include "ha_myisam.h" #include "ha_myisammrg.h" -/* - We have dummy hanldertons in case the handler has not been compiled - in. This will be removed in 5.1. -*/ -#ifdef HAVE_BERKELEY_DB -#include "ha_berkeley.h" -extern handlerton berkeley_hton; -#else -handlerton berkeley_hton = { "BerkeleyDB", SHOW_OPTION_NO, - "Supports transactions and page-level locking", DB_TYPE_BERKELEY_DB, NULL, - 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, HTON_NO_FLAGS }; -#endif -#ifdef HAVE_BLACKHOLE_DB -#include "ha_blackhole.h" -extern handlerton blackhole_hton; -#else -handlerton blackhole_hton = { "BLACKHOLE", SHOW_OPTION_NO, - "/dev/null storage engine (anything you write to it disappears)", - DB_TYPE_BLACKHOLE_DB, NULL, 0, 0, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - HTON_NO_FLAGS }; -#endif -#ifdef HAVE_EXAMPLE_DB -#include "examples/ha_example.h" -extern handlerton example_hton; -#else -handlerton example_hton = { "EXAMPLE", SHOW_OPTION_NO, - "Example storage engine", - DB_TYPE_EXAMPLE_DB, NULL, 0, 0, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - HTON_NO_FLAGS }; -#endif -#if defined(HAVE_ARCHIVE_DB) -#include "ha_archive.h" -extern handlerton archive_hton; -#else -handlerton archive_hton = { "ARCHIVE", SHOW_OPTION_NO, - "Archive storage engine", DB_TYPE_ARCHIVE_DB, NULL, 0, 0, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - HTON_NO_FLAGS }; -#endif -#ifdef HAVE_CSV_DB -#include "examples/ha_tina.h" -extern handlerton tina_hton; -#else -handlerton tina_hton = { "CSV", SHOW_OPTION_NO, "CSV storage engine", - DB_TYPE_CSV_DB, NULL, 0, 0, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - HTON_NO_FLAGS }; -#endif -#ifdef HAVE_INNOBASE_DB -#include "ha_innodb.h" -extern handlerton innobase_hton; -#else -handlerton innobase_hton = { "InnoDB", SHOW_OPTION_NO, - "Supports transactions, row-level locking, and foreign keys", - DB_TYPE_INNODB, NULL, 0, 0, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - HTON_NO_FLAGS }; -#endif -#ifdef HAVE_NDBCLUSTER_DB +#include <myisampack.h> +#include <errno.h> + +#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE +#define NDB_MAX_ATTRIBUTES_IN_TABLE 128 #include "ha_ndbcluster.h" -extern handlerton ndbcluster_hton; -#else -handlerton ndbcluster_hton = { "ndbcluster", SHOW_OPTION_NO, - "Clustered, fault-tolerant, memory-based tables", - DB_TYPE_NDBCLUSTER, NULL, 0, 0, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - HTON_NO_FLAGS }; #endif -#ifdef HAVE_FEDERATED_DB -#include "ha_federated.h" -extern handlerton federated_hton; -#else -handlerton federated_hton = { "FEDERATED", SHOW_OPTION_NO, - "Federated MySQL storage engine", DB_TYPE_FEDERATED_DB, NULL, 0, 0, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - HTON_NO_FLAGS }; +#ifdef WITH_PARTITION_STORAGE_ENGINE +#include "ha_partition.h" #endif -#include <myisampack.h> -#include <errno.h> -extern handlerton myisam_hton; -extern handlerton myisammrg_hton; -extern handlerton heap_hton; -extern handlerton binlog_hton; +#ifdef WITH_INNOBASE_STORAGE_ENGINE +#include "ha_innodb.h" +#endif -/* - Obsolete -*/ -handlerton isam_hton = { "ISAM", SHOW_OPTION_NO, "Obsolete storage engine", - DB_TYPE_ISAM, NULL, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, HTON_NO_FLAGS }; +extern handlerton *sys_table_types[]; +#define BITMAP_STACKBUF_SIZE (128/8) /* static functions defined in this file */ +static handler *create_default(TABLE_SHARE *table); + +const handlerton default_hton = +{ + MYSQL_HANDLERTON_INTERFACE_VERSION, + "DEFAULT", + SHOW_OPTION_YES, + NULL, + DB_TYPE_DEFAULT, + NULL, + 0, 0, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, + create_default, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, /* alter_tablespace */ + NULL, /* fill_files_table */ + HTON_NO_FLAGS, /* flags */ + NULL, /* binlog_func */ + NULL /* binlog_log_query */ +}; + static SHOW_COMP_OPTION have_yes= SHOW_OPTION_YES; /* number of entries in handlertons[] */ @@ -134,35 +80,14 @@ ulong total_ha_2pc; /* size of savepoint storage area (see ha_init) */ ulong savepoint_alloc_size; -/* - This array is used for processing compiled in engines. -*/ -handlerton *sys_table_types[]= -{ - &myisam_hton, - &heap_hton, - &innobase_hton, - &berkeley_hton, - &blackhole_hton, - &example_hton, - &archive_hton, - &tina_hton, - &ndbcluster_hton, - &federated_hton, - &myisammrg_hton, - &binlog_hton, - &isam_hton, - NULL -}; - struct show_table_alias_st sys_table_aliases[]= { - {"INNOBASE", "InnoDB"}, - {"NDB", "NDBCLUSTER"}, - {"BDB", "BERKELEYDB"}, - {"HEAP", "MEMORY"}, - {"MERGE", "MRG_MYISAM"}, - {NullS, NullS} + {"INNOBASE", DB_TYPE_INNODB}, + {"NDB", DB_TYPE_NDBCLUSTER}, + {"BDB", DB_TYPE_BERKELEY_DB}, + {"HEAP", DB_TYPE_HEAP}, + {"MERGE", DB_TYPE_MRG_MYISAM}, + {NullS, DB_TYPE_UNKNOWN} }; const char *ha_row_type[] = { @@ -178,24 +103,22 @@ TYPELIB tx_isolation_typelib= {array_elements(tx_isolation_names)-1,"", static TYPELIB known_extensions= {0,"known_exts", NULL, NULL}; uint known_extensions_id= 0; -enum db_type ha_resolve_by_name(const char *name, uint namelen) +handlerton *ha_resolve_by_name(THD *thd, LEX_STRING *name) { - THD *thd= current_thd; show_table_alias_st *table_alias; - handlerton **types; + st_plugin_int *plugin; if (thd && !my_strnncoll(&my_charset_latin1, - (const uchar *)name, namelen, + (const uchar *)name->str, name->length, (const uchar *)"DEFAULT", 7)) - return (enum db_type) thd->variables.table_type; + return ha_resolve_by_legacy_type(thd, DB_TYPE_DEFAULT); -retest: - for (types= sys_table_types; *types; types++) + if ((plugin= plugin_lock(name, MYSQL_STORAGE_ENGINE_PLUGIN))) { - if (!my_strnncoll(&my_charset_latin1, - (const uchar *)name, namelen, - (const uchar *)(*types)->name, strlen((*types)->name))) - return (enum db_type) (*types)->db_type; + handlerton *hton= (handlerton *) plugin->plugin->info; + if (!(hton->flags & HTON_NOT_USER_SELECTABLE)) + return hton; + plugin_unlock(plugin); } /* @@ -204,64 +127,98 @@ retest: for (table_alias= sys_table_aliases; table_alias->type; table_alias++) { if (!my_strnncoll(&my_charset_latin1, - (const uchar *)name, namelen, + (const uchar *)name->str, name->length, (const uchar *)table_alias->alias, strlen(table_alias->alias))) - { - name= table_alias->type; - namelen= strlen(name); - goto retest; - } + return ha_resolve_by_legacy_type(thd, table_alias->type); } - return DB_TYPE_UNKNOWN; + return NULL; } -const char *ha_get_storage_engine(enum db_type db_type) +struct plugin_find_dbtype_st { - handlerton **types; - for (types= sys_table_types; *types; types++) + enum legacy_db_type db_type; + handlerton *hton; +}; + + +static my_bool plugin_find_dbtype(THD *unused, st_plugin_int *plugin, + void *arg) +{ + handlerton *types= (handlerton *) plugin->plugin->info; + if (types->db_type == ((struct plugin_find_dbtype_st *)arg)->db_type) { - if (db_type == (*types)->db_type) - return (*types)->name; + ((struct plugin_find_dbtype_st *)arg)->hton= types; + return TRUE; } - return "*NONE*"; + return FALSE; } -bool ha_check_storage_engine_flag(enum db_type db_type, uint32 flag) +const char *ha_get_storage_engine(enum legacy_db_type db_type) { - handlerton **types; - for (types= sys_table_types; *types; types++) + struct plugin_find_dbtype_st info; + + switch (db_type) { - if (db_type == (*types)->db_type) - return test((*types)->flags & flag); + case DB_TYPE_DEFAULT: + return "DEFAULT"; + case DB_TYPE_UNKNOWN: + return "UNKNOWN"; + default: + info.db_type= db_type; + + if (!plugin_foreach(NULL, plugin_find_dbtype, + MYSQL_STORAGE_ENGINE_PLUGIN, &info)) + return "*NONE*"; + + return info.hton->name; } - return FALSE; // No matching engine } -my_bool ha_storage_engine_is_enabled(enum db_type database_type) +static handler *create_default(TABLE_SHARE *table) { - handlerton **types; - for (types= sys_table_types; *types; types++) + handlerton *hton=ha_resolve_by_legacy_type(current_thd, DB_TYPE_DEFAULT); + return (hton && hton != &default_hton && hton->create) ? + hton->create(table) : NULL; +} + + +handlerton *ha_resolve_by_legacy_type(THD *thd, enum legacy_db_type db_type) +{ + struct plugin_find_dbtype_st info; + + switch (db_type) { - if ((database_type == (*types)->db_type) && - ((*types)->state == SHOW_OPTION_YES)) - return TRUE; + case DB_TYPE_DEFAULT: + return (thd->variables.table_type != NULL) ? + thd->variables.table_type : + (global_system_variables.table_type != NULL ? + global_system_variables.table_type : &myisam_hton); + case DB_TYPE_UNKNOWN: + return NULL; + default: + info.db_type= db_type; + if (!plugin_foreach(NULL, plugin_find_dbtype, + MYSQL_STORAGE_ENGINE_PLUGIN, &info)) + return NULL; + + return info.hton; } - return FALSE; } /* Use other database handler if databasehandler is not compiled in */ -enum db_type ha_checktype(THD *thd, enum db_type database_type, +handlerton *ha_checktype(THD *thd, enum legacy_db_type database_type, bool no_substitute, bool report_error) { - if (ha_storage_engine_is_enabled(database_type)) - return database_type; + handlerton *hton= ha_resolve_by_legacy_type(thd, database_type); + if (ha_storage_engine_is_enabled(hton)) + return hton; if (no_substitute) { @@ -270,101 +227,81 @@ enum db_type ha_checktype(THD *thd, enum db_type database_type, const char *engine_name= ha_get_storage_engine(database_type); my_error(ER_FEATURE_DISABLED,MYF(0),engine_name,engine_name); } - return DB_TYPE_UNKNOWN; + return NULL; } switch (database_type) { #ifndef NO_HASH case DB_TYPE_HASH: - return (database_type); + return ha_resolve_by_legacy_type(thd, DB_TYPE_HASH); #endif case DB_TYPE_MRG_ISAM: - return (DB_TYPE_MRG_MYISAM); + return ha_resolve_by_legacy_type(thd, DB_TYPE_MRG_MYISAM); default: break; } - - return ((enum db_type) thd->variables.table_type != DB_TYPE_UNKNOWN ? - (enum db_type) thd->variables.table_type : - ((enum db_type) global_system_variables.table_type != - DB_TYPE_UNKNOWN ? - (enum db_type) global_system_variables.table_type : DB_TYPE_MYISAM) - ); + + return ha_resolve_by_legacy_type(thd, DB_TYPE_DEFAULT); } /* ha_checktype */ -handler *get_new_handler(TABLE *table, MEM_ROOT *alloc, enum db_type db_type) +handler *get_new_handler(TABLE_SHARE *share, MEM_ROOT *alloc, + handlerton *db_type) { - switch (db_type) { -#ifndef NO_HASH - case DB_TYPE_HASH: - return new (alloc) ha_hash(table); -#endif - case DB_TYPE_MRG_MYISAM: - case DB_TYPE_MRG_ISAM: - return new (alloc) ha_myisammrg(table); -#ifdef HAVE_BERKELEY_DB - case DB_TYPE_BERKELEY_DB: - if (have_berkeley_db == SHOW_OPTION_YES) - return new (alloc) ha_berkeley(table); - return NULL; -#endif -#ifdef HAVE_INNOBASE_DB - case DB_TYPE_INNODB: - if (have_innodb == SHOW_OPTION_YES) - return new (alloc) ha_innobase(table); - return NULL; -#endif -#ifdef HAVE_EXAMPLE_DB - case DB_TYPE_EXAMPLE_DB: - if (have_example_db == SHOW_OPTION_YES) - return new (alloc) ha_example(table); - return NULL; -#endif -#if defined(HAVE_ARCHIVE_DB) - case DB_TYPE_ARCHIVE_DB: - if (have_archive_db == SHOW_OPTION_YES) - return new (alloc) ha_archive(table); - return NULL; -#endif -#ifdef HAVE_BLACKHOLE_DB - case DB_TYPE_BLACKHOLE_DB: - if (have_blackhole_db == SHOW_OPTION_YES) - return new (alloc) ha_blackhole(table); - return NULL; -#endif -#ifdef HAVE_FEDERATED_DB - case DB_TYPE_FEDERATED_DB: - if (have_federated_db == SHOW_OPTION_YES) - return new (alloc) ha_federated(table); - return NULL; -#endif -#ifdef HAVE_CSV_DB - case DB_TYPE_CSV_DB: - if (have_csv_db == SHOW_OPTION_YES) - return new (alloc) ha_tina(table); - return NULL; -#endif -#ifdef HAVE_NDBCLUSTER_DB - case DB_TYPE_NDBCLUSTER: - if (have_ndbcluster == SHOW_OPTION_YES) - return new (alloc) ha_ndbcluster(table); - return NULL; -#endif - case DB_TYPE_HEAP: - return new (alloc) ha_heap(table); - default: // should never happen + handler *file= NULL; + /* + handlers are allocated with new in the handlerton create() function + we need to set the thd mem_root for these to be allocated correctly + */ + THD *thd= current_thd; + MEM_ROOT *thd_save_mem_root= thd->mem_root; + thd->mem_root= alloc; + + if (db_type != NULL && db_type->state == SHOW_OPTION_YES && db_type->create) + file= db_type->create(share); + + thd->mem_root= thd_save_mem_root; + + if (!file) { - enum db_type def=(enum db_type) current_thd->variables.table_type; + handlerton *def= current_thd->variables.table_type; /* Try first with 'default table type' */ if (db_type != def) - return get_new_handler(table, alloc, def); + return get_new_handler(share, alloc, def); + } + if (file) + { + if (file->ha_initialise()) + { + delete file; + file=0; + } + } + return file; +} + + +#ifdef WITH_PARTITION_STORAGE_ENGINE +handler *get_ha_partition(partition_info *part_info) +{ + ha_partition *partition; + DBUG_ENTER("get_ha_partition"); + if ((partition= new ha_partition(part_info))) + { + if (partition->ha_initialise()) + { + delete partition; + partition= 0; + } } - /* Fall back to MyISAM */ - case DB_TYPE_MYISAM: - return new (alloc) ha_myisam(table); + else + { + my_error(ER_OUTOFMEMORY, MYF(0), sizeof(ha_partition)); } + DBUG_RETURN(((handler*) partition)); } +#endif + /* Register handler error messages for use with my_error(). @@ -425,6 +362,7 @@ static int ha_init_errors(void) SETMSG(HA_ERR_TABLE_EXIST, ER(ER_TABLE_EXISTS_ERROR)); SETMSG(HA_ERR_NO_CONNECTION, "Could not connect to storage engine"); SETMSG(HA_ERR_TABLE_DEF_CHANGED, ER(ER_TABLE_DEF_CHANGED)); + SETMSG(HA_ERR_FOREIGN_DUPLICATE_KEY, "FK constraint would lead to duplicate key"); SETMSG(HA_ERR_TABLE_NEEDS_UPGRADE, ER(ER_TABLE_NEEDS_UPGRADE)); /* Register the error messages for use with my_error(). */ @@ -455,16 +393,55 @@ static int ha_finish_errors(void) } -static inline void ha_was_inited_ok(handlerton **ht) +static void ha_was_inited_ok(handlerton *ht) { - uint tmp= (*ht)->savepoint_offset; - (*ht)->savepoint_offset= savepoint_alloc_size; + uint tmp= ht->savepoint_offset; + ht->savepoint_offset= savepoint_alloc_size; savepoint_alloc_size+= tmp; - (*ht)->slot= total_ha++; - if ((*ht)->prepare) + ht->slot= total_ha++; + if (ht->prepare) total_ha_2pc++; } + +int ha_initialize_handlerton(handlerton *hton) +{ + DBUG_ENTER("ha_initialize_handlerton"); + + if (hton == NULL) + DBUG_RETURN(1); + + switch (hton->state) + { + case SHOW_OPTION_NO: + break; + case SHOW_OPTION_YES: + if (!hton->init || !hton->init()) + { + ha_was_inited_ok(hton); + break; + } + /* fall through */ + default: + hton->state= SHOW_OPTION_DISABLED; + break; + } + DBUG_RETURN(0); +} + + +static my_bool init_handlerton(THD *unused1, st_plugin_int *plugin, + void *unused2) +{ + if (plugin->state == PLUGIN_IS_UNINITIALIZED) + { + ha_initialize_handlerton((handlerton *) plugin->plugin->info); + plugin->state= PLUGIN_IS_READY; + } + return FALSE; +} + + int ha_init() { int error= 0; @@ -475,16 +452,8 @@ int ha_init() if (ha_init_errors()) return 1; - /* - We now initialize everything here. - */ - for (types= sys_table_types; *types; types++) - { - if (!(*types)->init || !(*types)->init()) - ha_was_inited_ok(types); - else - (*types)->state= SHOW_OPTION_DISABLED; - } + if (plugin_foreach(NULL, init_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, 0)) + return 1; DBUG_ASSERT(total_ha < MAX_HA); /* @@ -497,70 +466,97 @@ int ha_init() return error; } + +int ha_register_builtin_plugins() +{ + handlerton **hton; + uint size= 0; + struct st_mysql_plugin *plugin; + DBUG_ENTER("ha_register_builtin_plugins"); + + for (hton= sys_table_types; *hton; hton++) + size+= sizeof(struct st_mysql_plugin); + + if (!(plugin= (struct st_mysql_plugin *) + my_once_alloc(size, MYF(MY_WME | MY_ZEROFILL)))) + DBUG_RETURN(1); + + for (hton= sys_table_types; *hton; hton++, plugin++) + { + plugin->type= MYSQL_STORAGE_ENGINE_PLUGIN; + plugin->info= *hton; + plugin->version= 0; + plugin->name= (*hton)->name; + plugin->author= NULL; + plugin->descr= (*hton)->comment; + + if (plugin_register_builtin(plugin)) + DBUG_RETURN(1); + } + DBUG_RETURN(0); +} + + + + /* close, flush or restart databases */ /* Ignore this for other databases than ours */ +static my_bool panic_handlerton(THD *unused1, st_plugin_int *plugin, + void *arg) +{ + handlerton *hton= (handlerton *) plugin->plugin->info; + if (hton->state == SHOW_OPTION_YES && hton->panic) + ((int*)arg)[0]|= hton->panic((enum ha_panic_function)((int*)arg)[1]); + return FALSE; +} + + int ha_panic(enum ha_panic_function flag) { - int error=0; -#ifndef NO_HASH - error|=h_panic(flag); /* fix hash */ -#endif -#ifdef HAVE_ISAM - error|=mrg_panic(flag); - error|=nisam_panic(flag); -#endif - error|=heap_panic(flag); - error|=mi_panic(flag); - error|=myrg_panic(flag); -#ifdef HAVE_BERKELEY_DB - if (have_berkeley_db == SHOW_OPTION_YES) - error|=berkeley_end(); -#endif -#ifdef HAVE_INNOBASE_DB - if (have_innodb == SHOW_OPTION_YES) - error|=innobase_end(); -#endif -#ifdef HAVE_NDBCLUSTER_DB - if (have_ndbcluster == SHOW_OPTION_YES) - error|=ndbcluster_end(); -#endif -#ifdef HAVE_FEDERATED_DB - if (have_federated_db == SHOW_OPTION_YES) - error|= federated_db_end(); -#endif -#if defined(HAVE_ARCHIVE_DB) - if (have_archive_db == SHOW_OPTION_YES) - error|= archive_db_end(); -#endif -#ifdef HAVE_CSV_DB - if (have_csv_db == SHOW_OPTION_YES) - error|= tina_end(); -#endif - if (ha_finish_errors()) - error= 1; - return error; + int error[2]; + + error[0]= 0; error[1]= (int)flag; + plugin_foreach(NULL, panic_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, error); + + if (flag == HA_PANIC_CLOSE && ha_finish_errors()) + error[0]= 1; + return error[0]; } /* ha_panic */ +static my_bool dropdb_handlerton(THD *unused1, st_plugin_int *plugin, + void *path) +{ + handlerton *hton= (handlerton *) plugin->plugin->info; + if (hton->state == SHOW_OPTION_YES && hton->drop_database) + hton->drop_database((char *)path); + return FALSE; +} + + void ha_drop_database(char* path) { -#ifdef HAVE_INNOBASE_DB - if (have_innodb == SHOW_OPTION_YES) - innobase_drop_database(path); -#endif -#ifdef HAVE_NDBCLUSTER_DB - if (have_ndbcluster == SHOW_OPTION_YES) - ndbcluster_drop_database(path); -#endif + plugin_foreach(NULL, dropdb_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, path); } + +static my_bool closecon_handlerton(THD *thd, st_plugin_int *plugin, + void *unused) +{ + handlerton *hton= (handlerton *) plugin->plugin->info; + /* there's no need to rollback here as all transactions must + be rolled back already */ + if (hton->state == SHOW_OPTION_YES && hton->close_connection && + thd->ha_data[hton->slot]) + hton->close_connection(thd); + return FALSE; +} + + /* don't bother to rollback here, it's done already */ void ha_close_connection(THD* thd) { - handlerton **types; - for (types= sys_table_types; *types; types++) - if (thd->ha_data[(*types)->slot]) - (*types)->close_connection(thd); + plugin_foreach(thd, closecon_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, 0); } /* ======================================================================== @@ -870,21 +866,46 @@ int ha_autocommit_or_rollback(THD *thd, int error) } -int ha_commit_or_rollback_by_xid(XID *xid, bool commit) +struct xahton_st { + XID *xid; + int result; +}; + +static my_bool xacommit_handlerton(THD *unused1, st_plugin_int *plugin, + void *arg) { - handlerton **types; - int res= 1; + handlerton *hton= (handlerton *) plugin->plugin->info; + if (hton->state == SHOW_OPTION_YES && hton->recover) + { + hton->commit_by_xid(((struct xahton_st *)arg)->xid); + ((struct xahton_st *)arg)->result= 0; + } + return FALSE; +} - for (types= sys_table_types; *types; types++) +static my_bool xarollback_handlerton(THD *unused1, st_plugin_int *plugin, + void *arg) +{ + handlerton *hton= (handlerton *) plugin->plugin->info; + if (hton->state == SHOW_OPTION_YES && hton->recover) { - if ((*types)->state == SHOW_OPTION_YES && (*types)->recover) - { - if ((*(commit ? (*types)->commit_by_xid : - (*types)->rollback_by_xid))(xid)); - res= 0; - } + hton->rollback_by_xid(((struct xahton_st *)arg)->xid); + ((struct xahton_st *)arg)->result= 0; } - return res; + return FALSE; +} + + +int ha_commit_or_rollback_by_xid(XID *xid, bool commit) +{ + struct xahton_st xaop; + xaop.xid= xid; + xaop.result= 1; + + plugin_foreach(NULL, commit ? xacommit_handlerton : xarollback_handlerton, + MYSQL_STORAGE_ENGINE_PLUGIN, &xaop); + + return xaop.result; } @@ -960,99 +981,123 @@ static char* xid_to_str(char *buf, XID *xid) in this case commit_list==0, tc_heuristic_recover == 0 there should be no prepared transactions in this case. */ -int ha_recover(HASH *commit_list) -{ - int len, got, found_foreign_xids=0, found_my_xids=0; - handlerton **types; - XID *list=0; - bool dry_run=(commit_list==0 && tc_heuristic_recover==0); - DBUG_ENTER("ha_recover"); - - /* commit_list and tc_heuristic_recover cannot be set both */ - DBUG_ASSERT(commit_list==0 || tc_heuristic_recover==0); - /* if either is set, total_ha_2pc must be set too */ - DBUG_ASSERT(dry_run || total_ha_2pc>(ulong)opt_bin_log); - if (total_ha_2pc <= (ulong)opt_bin_log) - DBUG_RETURN(0); - - if (commit_list) - sql_print_information("Starting crash recovery..."); - -#ifndef WILL_BE_DELETED_LATER - /* - for now, only InnoDB supports 2pc. It means we can always safely - rollback all pending transactions, without risking inconsistent data - */ - DBUG_ASSERT(total_ha_2pc == (ulong) opt_bin_log+1); // only InnoDB and binlog - tc_heuristic_recover= TC_HEURISTIC_RECOVER_ROLLBACK; // forcing ROLLBACK - dry_run=FALSE; -#endif - - for (len= MAX_XID_LIST_SIZE ; list==0 && len > MIN_XID_LIST_SIZE; len/=2) - { - list=(XID *)my_malloc(len*sizeof(XID), MYF(0)); - } - if (!list) - { - sql_print_error(ER(ER_OUTOFMEMORY), len*sizeof(XID)); - DBUG_RETURN(1); - } +struct xarecover_st +{ + int len, found_foreign_xids, found_my_xids; + XID *list; + HASH *commit_list; + bool dry_run; +}; - for (types= sys_table_types; *types; types++) +static my_bool xarecover_handlerton(THD *unused, st_plugin_int *plugin, + void *arg) +{ + handlerton *hton= (handlerton *) plugin->plugin->info; + struct xarecover_st *info= (struct xarecover_st *) arg; + int got; + + if (hton->state == SHOW_OPTION_YES && hton->recover) { - if ((*types)->state != SHOW_OPTION_YES || !(*types)->recover) - continue; - while ((got=(*(*types)->recover)(list, len)) > 0 ) + while ((got= hton->recover(info->list, info->len)) > 0 ) { sql_print_information("Found %d prepared transaction(s) in %s", - got, (*types)->name); + got, hton->name); for (int i=0; i < got; i ++) { - my_xid x=list[i].get_my_xid(); + my_xid x=info->list[i].get_my_xid(); if (!x) // not "mine" - that is generated by external TM { #ifndef DBUG_OFF char buf[XIDDATASIZE*4+6]; // see xid_to_str - sql_print_information("ignore xid %s", xid_to_str(buf, list+i)); + sql_print_information("ignore xid %s", xid_to_str(buf, info->list+i)); #endif - xid_cache_insert(list+i, XA_PREPARED); - found_foreign_xids++; + xid_cache_insert(info->list+i, XA_PREPARED); + info->found_foreign_xids++; continue; } - if (dry_run) + if (info->dry_run) { - found_my_xids++; + info->found_my_xids++; continue; } // recovery mode - if (commit_list ? - hash_search(commit_list, (byte *)&x, sizeof(x)) != 0 : + if (info->commit_list ? + hash_search(info->commit_list, (byte *)&x, sizeof(x)) != 0 : tc_heuristic_recover == TC_HEURISTIC_RECOVER_COMMIT) { #ifndef DBUG_OFF char buf[XIDDATASIZE*4+6]; // see xid_to_str - sql_print_information("commit xid %s", xid_to_str(buf, list+i)); + sql_print_information("commit xid %s", xid_to_str(buf, info->list+i)); #endif - (*(*types)->commit_by_xid)(list+i); + hton->commit_by_xid(info->list+i); } else { #ifndef DBUG_OFF char buf[XIDDATASIZE*4+6]; // see xid_to_str - sql_print_information("rollback xid %s", xid_to_str(buf, list+i)); + sql_print_information("rollback xid %s", + xid_to_str(buf, info->list+i)); #endif - (*(*types)->rollback_by_xid)(list+i); + hton->rollback_by_xid(info->list+i); } } - if (got < len) + if (got < info->len) break; } } - my_free((gptr)list, MYF(0)); - if (found_foreign_xids) - sql_print_warning("Found %d prepared XA transactions", found_foreign_xids); - if (dry_run && found_my_xids) + return FALSE; +} + +int ha_recover(HASH *commit_list) +{ + struct xarecover_st info; + DBUG_ENTER("ha_recover"); + info.found_foreign_xids= info.found_my_xids= 0; + info.commit_list= commit_list; + info.dry_run= (info.commit_list==0 && tc_heuristic_recover==0); + info.list= NULL; + + /* commit_list and tc_heuristic_recover cannot be set both */ + DBUG_ASSERT(info.commit_list==0 || tc_heuristic_recover==0); + /* if either is set, total_ha_2pc must be set too */ + DBUG_ASSERT(info.dry_run || total_ha_2pc>(ulong)opt_bin_log); + + if (total_ha_2pc <= (ulong)opt_bin_log) + DBUG_RETURN(0); + + if (info.commit_list) + sql_print_information("Starting crash recovery..."); + +#ifndef WILL_BE_DELETED_LATER + /* + for now, only InnoDB supports 2pc. It means we can always safely + rollback all pending transactions, without risking inconsistent data + */ + DBUG_ASSERT(total_ha_2pc == (ulong) opt_bin_log+1); // only InnoDB and binlog + tc_heuristic_recover= TC_HEURISTIC_RECOVER_ROLLBACK; // forcing ROLLBACK + info.dry_run=FALSE; +#endif + + for (info.len= MAX_XID_LIST_SIZE ; + info.list==0 && info.len > MIN_XID_LIST_SIZE; info.len/=2) + { + info.list=(XID *)my_malloc(info.len*sizeof(XID), MYF(0)); + } + if (!info.list) + { + sql_print_error(ER(ER_OUTOFMEMORY), info.len*sizeof(XID)); + DBUG_RETURN(1); + } + + plugin_foreach(NULL, xarecover_handlerton, + MYSQL_STORAGE_ENGINE_PLUGIN, &info); + + my_free((gptr)info.list, MYF(0)); + if (info.found_foreign_xids) + sql_print_warning("Found %d prepared XA transactions", + info.found_foreign_xids); + if (info.dry_run && info.found_my_xids) { sql_print_error("Found %d prepared transactions! It means that mysqld was " "not shut down properly last time and critical recovery " @@ -1060,10 +1105,10 @@ int ha_recover(HASH *commit_list) "after a crash. You have to start mysqld with " "--tc-heuristic-recover switch to commit or rollback " "pending transactions.", - found_my_xids, opt_tc_log_file); + info.found_my_xids, opt_tc_log_file); DBUG_RETURN(1); } - if (commit_list) + if (info.commit_list) sql_print_information("Crash recovery finished."); DBUG_RETURN(0); } @@ -1136,24 +1181,8 @@ bool mysql_xa_recover(THD *thd) int ha_release_temporary_latches(THD *thd) { -#ifdef HAVE_INNOBASE_DB - if (opt_innodb) - innobase_release_temporary_latches(thd); -#endif - return 0; -} - - -/* - Export statistics for different engines. Currently we use it only for - InnoDB. -*/ - -int ha_update_statistics() -{ -#ifdef HAVE_INNOBASE_DB - if (opt_innodb) - innodb_export_status(); +#ifdef WITH_INNOBASE_STORAGE_ENGINE + innobase_release_temporary_latches(thd); #endif return 0; } @@ -1182,7 +1211,8 @@ int ha_rollback_to_savepoint(THD *thd, SAVEPOINT *sv) my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err); error=1; } - statistic_increment(thd->status_var.ha_savepoint_rollback_count,&LOCK_status); + statistic_increment(thd->status_var.ha_savepoint_rollback_count, + &LOCK_status); trans->no_2pc|=(*ht)->prepare == 0; } /* @@ -1262,38 +1292,62 @@ int ha_release_savepoint(THD *thd, SAVEPOINT *sv) } +static my_bool snapshot_handlerton(THD *thd, st_plugin_int *plugin, + void *arg) +{ + handlerton *hton= (handlerton *) plugin->plugin->info; + if (hton->state == SHOW_OPTION_YES && + hton->start_consistent_snapshot) + { + hton->start_consistent_snapshot(thd); + *((bool *)arg)= false; + } + return FALSE; +} + int ha_start_consistent_snapshot(THD *thd) { -#ifdef HAVE_INNOBASE_DB - if ((have_innodb == SHOW_OPTION_YES) && - !innobase_start_trx_and_assign_read_view(thd)) - return 0; -#endif + bool warn= true; + + plugin_foreach(thd, snapshot_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, &warn); + /* Same idea as when one wants to CREATE TABLE in one engine which does not exist: */ - push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR, - "This MySQL server does not support any " - "consistent-read capable storage engine"); + if (warn) + push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR, + "This MySQL server does not support any " + "consistent-read capable storage engine"); return 0; } -bool ha_flush_logs() +static my_bool flush_handlerton(THD *thd, st_plugin_int *plugin, + void *arg) { - bool result=0; -#ifdef HAVE_BERKELEY_DB - if ((have_berkeley_db == SHOW_OPTION_YES) && - berkeley_flush_logs()) - result=1; -#endif -#ifdef HAVE_INNOBASE_DB - if ((have_innodb == SHOW_OPTION_YES) && - innobase_flush_logs()) - result=1; -#endif - return result; + handlerton *hton= (handlerton *) plugin->plugin->info; + if (hton->state == SHOW_OPTION_YES && hton->flush_logs && hton->flush_logs()) + return TRUE; + return FALSE; +} + + +bool ha_flush_logs(handlerton *db_type) +{ + if (db_type == NULL) + { + if (plugin_foreach(NULL, flush_handlerton, + MYSQL_STORAGE_ENGINE_PLUGIN, 0)) + return TRUE; + } + else + { + if (db_type->state != SHOW_OPTION_YES || + (db_type->flush_logs && db_type->flush_logs())) + return TRUE; + } + return FALSE; } /* @@ -1301,8 +1355,8 @@ bool ha_flush_logs() The .frm file will be deleted only if we return 0 or ENOENT */ -int ha_delete_table(THD *thd, enum db_type table_type, const char *path, - const char *alias, bool generate_warning) +int ha_delete_table(THD *thd, handlerton *table_type, const char *path, + const char *db, const char *alias, bool generate_warning) { handler *file; char tmp_path[FN_REFLEN]; @@ -1316,8 +1370,8 @@ int ha_delete_table(THD *thd, enum db_type table_type, const char *path, dummy_table.s= &dummy_share; /* DB_TYPE_UNKNOWN is used in ALTER TABLE when renaming only .frm files */ - if (table_type == DB_TYPE_UNKNOWN || - ! (file=get_new_handler(&dummy_table, thd->mem_root, table_type))) + if (table_type == NULL || + ! (file=get_new_handler(&dummy_share, thd->mem_root, table_type))) DBUG_RETURN(ENOENT); if (lower_case_table_names == 2 && !(file->table_flags() & HA_FILE_BASED)) @@ -1350,7 +1404,12 @@ int ha_delete_table(THD *thd, enum db_type table_type, const char *path, thd->net.last_error[0]= 0; /* Fill up strucutures that print_error may need */ - dummy_table.s->path= path; + dummy_share.path.str= (char*) path; + dummy_share.path.length= strlen(path); + dummy_share.db.str= (char*) db; + dummy_share.db.length= strlen(db); + dummy_share.table_name.str= (char*) alias; + dummy_share.table_name.length= strlen(alias); dummy_table.alias= alias; file->print_error(error, 0); @@ -1372,16 +1431,32 @@ int ha_delete_table(THD *thd, enum db_type table_type, const char *path, ** General handler functions ****************************************************************************/ - /* Open database-handler. Try O_RDONLY if can't open as O_RDWR */ - /* Don't wait for locks if not HA_OPEN_WAIT_IF_LOCKED is set */ -int handler::ha_open(const char *name, int mode, int test_if_locked) +void handler::ha_statistic_increment(ulong SSV::*offset) const +{ + statistic_increment(table->in_use->status_var.*offset, &LOCK_status); +} + +/* + Open database-handler. + + IMPLEMENTATION + Try O_RDONLY if cannot open as O_RDWR + Don't wait for locks if not HA_OPEN_WAIT_IF_LOCKED is set +*/ + +int handler::ha_open(TABLE *table_arg, const char *name, int mode, + int test_if_locked) { int error; DBUG_ENTER("handler::ha_open"); - DBUG_PRINT("enter",("name: %s db_type: %d db_stat: %d mode: %d lock_test: %d", - name, table->s->db_type, table->db_stat, mode, - test_if_locked)); + DBUG_PRINT("enter", + ("name: %s db_type: %d db_stat: %d mode: %d lock_test: %d", + name, table_share->db_type, table_arg->db_stat, mode, + test_if_locked)); + + table= table_arg; + DBUG_ASSERT(table->s == table_share); if ((error=open(name,mode,test_if_locked))) { @@ -1394,7 +1469,7 @@ int handler::ha_open(const char *name, int mode, int test_if_locked) } if (error) { - my_errno=error; /* Safeguard */ + my_errno= error; /* Safeguard */ DBUG_PRINT("error",("error: %d errno: %d",error,errno)); } else @@ -1412,10 +1487,93 @@ int handler::ha_open(const char *name, int mode, int test_if_locked) } else dupp_ref=ref+ALIGN_SIZE(ref_length); + + if (ha_allocate_read_write_set(table->s->fields)) + error= 1; } DBUG_RETURN(error); } + +int handler::ha_initialise() +{ + DBUG_ENTER("ha_initialise"); + DBUG_RETURN(FALSE); +} + + +/* + Initalize bit maps for used fields + + Called from open_table_from_share() +*/ + +int handler::ha_allocate_read_write_set(ulong no_fields) +{ + uint bitmap_size= bitmap_buffer_size(no_fields+1); + uint32 *read_buf, *write_buf; + DBUG_ENTER("ha_allocate_read_write_set"); + DBUG_PRINT("enter", ("no_fields = %d", no_fields)); + + if (!multi_alloc_root(&table->mem_root, + &read_set, sizeof(MY_BITMAP), + &write_set, sizeof(MY_BITMAP), + &read_buf, bitmap_size, + &write_buf, bitmap_size, + NullS)) + { + DBUG_RETURN(TRUE); + } + bitmap_init(read_set, read_buf, no_fields+1, FALSE); + bitmap_init(write_set, write_buf, no_fields+1, FALSE); + table->read_set= read_set; + table->write_set= write_set; + ha_clear_all_set(); + DBUG_RETURN(FALSE); +} + +void handler::ha_clear_all_set() +{ + DBUG_ENTER("ha_clear_all_set"); + bitmap_clear_all(read_set); + bitmap_clear_all(write_set); + bitmap_set_bit(read_set, 0); + bitmap_set_bit(write_set, 0); + DBUG_VOID_RETURN; +} + +int handler::ha_retrieve_all_cols() +{ + DBUG_ENTER("handler::ha_retrieve_all_cols"); + bitmap_set_all(read_set); + DBUG_RETURN(0); +} + +int handler::ha_retrieve_all_pk() +{ + DBUG_ENTER("ha_retrieve_all_pk"); + ha_set_primary_key_in_read_set(); + DBUG_RETURN(0); +} + +void handler::ha_set_primary_key_in_read_set() +{ + ulong prim_key= table->s->primary_key; + DBUG_ENTER("handler::ha_set_primary_key_in_read_set"); + DBUG_PRINT("info", ("Primary key = %d", prim_key)); + if (prim_key != MAX_KEY) + { + KEY_PART_INFO *key_part= table->key_info[prim_key].key_part; + KEY_PART_INFO *key_part_end= key_part + + table->key_info[prim_key].key_parts; + for (;key_part != key_part_end; ++key_part) + ha_set_bit_in_read_set(key_part->fieldnr); + } + DBUG_VOID_RETURN; +} + + + /* Read first row (only) from a table This is never called for InnoDB or BDB tables, as these table types @@ -1427,7 +1585,8 @@ int handler::read_first_row(byte * buf, uint primary_key) register int error; DBUG_ENTER("handler::read_first_row"); - statistic_increment(current_thd->status_var.ha_read_first_count,&LOCK_status); + statistic_increment(table->in_use->status_var.ha_read_first_count, + &LOCK_status); /* If there is very few deleted rows in the table, find the first row by @@ -1444,7 +1603,7 @@ int handler::read_first_row(byte * buf, uint primary_key) else { /* Find the first row through the primary key */ - (void) ha_index_init(primary_key); + (void) ha_index_init(primary_key, 0); error=index_first(buf); (void) ha_index_end(); } @@ -1628,7 +1787,7 @@ ulonglong handler::get_auto_increment() int error; (void) extra(HA_EXTRA_KEYREAD); - index_init(table->s->next_number_index); + index_init(table->s->next_number_index, 1); if (!table->s->next_number_key_offset) { // Autoincrement at key-start error=index_last(table->record[1]); @@ -1693,9 +1852,10 @@ void handler::print_error(int error, myf errflag) uint key_nr=get_dup_key(error); if ((int) key_nr >= 0) { - /* Write the dupplicated key in the error message */ + /* Write the duplicated key in the error message */ char key[MAX_KEY_LENGTH]; String str(key,sizeof(key),system_charset_info); + /* Table is opened and defined at this point */ key_unpack(&str,table,(uint) key_nr); uint max_length=MYSQL_ERRMSG_SIZE-(uint) strlen(ER(ER_DUP_ENTRY)); if (str.length() >= max_length) @@ -1703,12 +1863,35 @@ void handler::print_error(int error, myf errflag) str.length(max_length-4); str.append(STRING_WITH_LEN("...")); } - my_error(ER_DUP_ENTRY, MYF(0), str.c_ptr(), key_nr+1); + my_error(ER_DUP_ENTRY, MYF(0), str.c_ptr(), table->key_info[key_nr].name); DBUG_VOID_RETURN; } textno=ER_DUP_KEY; break; } + case HA_ERR_FOREIGN_DUPLICATE_KEY: + { + uint key_nr= get_dup_key(error); + if ((int) key_nr >= 0) + { + /* Write the key in the error message */ + char key[MAX_KEY_LENGTH]; + String str(key,sizeof(key),system_charset_info); + /* Table is opened and defined at this point */ + key_unpack(&str,table,(uint) key_nr); + uint max_length= MYSQL_ERRMSG_SIZE-(uint) strlen(ER(ER_FOREIGN_DUPLICATE_KEY)); + if (str.length() >= max_length) + { + str.length(max_length-4); + str.append(STRING_WITH_LEN("...")); + } + my_error(ER_FOREIGN_DUPLICATE_KEY, MYF(0), table_share->table_name.str, + str.c_ptr(), key_nr+1); + DBUG_VOID_RETURN; + } + textno= ER_DUP_KEY; + break; + } case HA_ERR_NULL_IN_SPATIAL: textno= ER_UNKNOWN_ERROR; break; @@ -1782,19 +1965,20 @@ void handler::print_error(int error, myf errflag) textno=ER_TABLE_DEF_CHANGED; break; case HA_ERR_NO_SUCH_TABLE: - { - /* - We have to use path to find database name instead of using - table->table_cache_key because if the table didn't exist, then - table_cache_key was not set up - */ - char *db; - char buff[FN_REFLEN]; - uint length= dirname_part(buff,table->s->path); - buff[length-1]=0; - db=buff+dirname_length(buff); - my_error(ER_NO_SUCH_TABLE, MYF(0), db, table->alias); + my_error(ER_NO_SUCH_TABLE, MYF(0), table_share->db.str, + table_share->table_name.str); break; + case HA_ERR_RBR_LOGGING_FAILED: + textno= ER_BINLOG_ROW_LOGGING_FAILED; + break; + case HA_ERR_DROP_INDEX_FK: + { + const char *ptr= "???"; + uint key_nr= get_dup_key(error); + if ((int) key_nr >= 0) + ptr= table->key_info[key_nr].name; + my_error(ER_DROP_INDEX_FK, MYF(0), ptr); + DBUG_VOID_RETURN; } case HA_ERR_TABLE_NEEDS_UPGRADE: textno=ER_TABLE_NEEDS_UPGRADE; @@ -1819,7 +2003,7 @@ void handler::print_error(int error, myf errflag) DBUG_VOID_RETURN; } } - my_error(textno, errflag, table->alias, error); + my_error(textno, errflag, table_share->table_name.str, error); DBUG_VOID_RETURN; } @@ -1901,10 +2085,7 @@ static bool update_frm_version(TABLE *table, bool needs_lock) if (table->s->mysql_version != MYSQL_VERSION_ID) DBUG_RETURN(0); - strxnmov(path, sizeof(path)-1, mysql_data_home, "/", table->s->db, "/", - table->s->table_name, reg_ext, NullS); - if (!unpack_filename(path, path)) - DBUG_RETURN(1); + strxmov(path, table->s->normalized_path.str, reg_ext, NullS); if (needs_lock) pthread_mutex_lock(&LOCK_open); @@ -1912,8 +2093,8 @@ static bool update_frm_version(TABLE *table, bool needs_lock) if ((file= my_open(path, O_RDWR|O_BINARY, MYF(MY_WME))) >= 0) { uchar version[4]; - char *key= table->s->table_cache_key; - uint key_length= table->s->key_length; + char *key= table->s->table_cache_key.str; + uint key_length= table->s->table_cache_key.length; TABLE *entry; HASH_SEARCH_STATE state; @@ -1943,8 +2124,9 @@ uint handler::get_dup_key(int error) { DBUG_ENTER("handler::get_dup_key"); table->file->errkey = (uint) -1; - if (error == HA_ERR_FOUND_DUPP_KEY || error == HA_ERR_FOUND_DUPP_UNIQUE || - error == HA_ERR_NULL_IN_SPATIAL) + if (error == HA_ERR_FOUND_DUPP_KEY || error == HA_ERR_FOREIGN_DUPLICATE_KEY || + error == HA_ERR_FOUND_DUPP_UNIQUE || error == HA_ERR_NULL_IN_SPATIAL || + error == HA_ERR_DROP_INDEX_FK) info(HA_STATUS_ERRKEY | HA_STATUS_NO_LOCK); DBUG_RETURN(table->file->errkey); } @@ -1975,7 +2157,7 @@ int handler::delete_table(const char *name) for (const char **ext=bas_ext(); *ext ; ext++) { - fn_format(buff, name, "", *ext, 2 | 4); + fn_format(buff, name, "", *ext, MY_UNPACK_FILENAME|MY_APPEND_EXT); if (my_delete_with_symlink(buff, MYF(0))) { if ((error= my_errno) != ENOENT) @@ -2005,6 +2187,13 @@ int handler::rename_table(const char * from, const char * to) } +void handler::drop_table(const char *name) +{ + close(); + delete_table(name); +} + + /* Performs checks upon the table. @@ -2099,29 +2288,64 @@ int handler::index_next_same(byte *buf, const byte *key, uint keylen) } +void handler::get_dynamic_partition_info(PARTITION_INFO *stat_info, uint part_id) +{ + info(HA_STATUS_CONST | HA_STATUS_TIME | HA_STATUS_VARIABLE | + HA_STATUS_NO_LOCK); + stat_info->records= records; + stat_info->mean_rec_length= mean_rec_length; + stat_info->data_file_length= data_file_length; + stat_info->max_data_file_length= max_data_file_length; + stat_info->index_file_length= index_file_length; + stat_info->delete_length= delete_length; + stat_info->create_time= create_time; + stat_info->update_time= update_time; + stat_info->check_time= check_time; + stat_info->check_sum= 0; + if (table_flags() & (ulong) HA_HAS_CHECKSUM) + stat_info->check_sum= checksum(); + return; +} + + /**************************************************************************** ** Some general functions that isn't in the handler class ****************************************************************************/ /* Initiates table-file and calls apropriate database-creator - Returns 1 if something got wrong + + NOTES + We must have a write lock on LOCK_open to be sure no other thread + interfers with table + + RETURN + 0 ok + 1 error */ -int ha_create_table(const char *name, HA_CREATE_INFO *create_info, +int ha_create_table(THD *thd, const char *path, + const char *db, const char *table_name, + HA_CREATE_INFO *create_info, bool update_create_info) { - int error; + int error= 1; TABLE table; char name_buff[FN_REFLEN]; + const char *name; + TABLE_SHARE share; DBUG_ENTER("ha_create_table"); + + init_tmp_table_share(&share, db, 0, table_name, path); + if (open_table_def(thd, &share, 0) || + open_table_from_share(thd, &share, "", 0, (uint) READ_ALL, 0, &table, + TRUE)) + goto err; - if (openfrm(current_thd, name,"",0,(uint) READ_ALL, 0, &table)) - DBUG_RETURN(1); if (update_create_info) - { update_create_info_from_table(create_info, &table); - } + + name= share.path.str; if (lower_case_table_names == 2 && !(table.file->table_flags() & HA_FILE_BASED)) { @@ -2131,27 +2355,32 @@ int ha_create_table(const char *name, HA_CREATE_INFO *create_info, name= name_buff; } - error=table.file->create(name,&table,create_info); - VOID(closefrm(&table)); + error= table.file->create(name, &table, create_info); + VOID(closefrm(&table, 0)); if (error) - my_error(ER_CANT_CREATE_TABLE, MYF(ME_BELL+ME_WAITTANG), name,error); + { + strxmov(name_buff, db, ".", table_name, NullS); + my_error(ER_CANT_CREATE_TABLE, MYF(ME_BELL+ME_WAITTANG), name_buff, error); + } +err: + free_table_share(&share); DBUG_RETURN(error != 0); } /* - Try to discover table from engine and - if found, write the frm file to disk. + Try to discover table from engine + + NOTES + If found, write the frm file to disk. RETURN VALUES: - -1 : Table did not exists - 0 : Table created ok - > 0 : Error, table existed but could not be created + -1 Table did not exists + 0 Table created ok + > 0 Error, table existed but could not be created */ -int ha_create_table_from_engine(THD* thd, - const char *db, - const char *name) +int ha_create_table_from_engine(THD* thd, const char *db, const char *name) { int error; const void *frmblob; @@ -2159,6 +2388,7 @@ int ha_create_table_from_engine(THD* thd, char path[FN_REFLEN]; HA_CREATE_INFO create_info; TABLE table; + TABLE_SHARE share; DBUG_ENTER("ha_create_table_from_engine"); DBUG_PRINT("enter", ("name '%s'.'%s'", db, name)); @@ -2174,15 +2404,23 @@ int ha_create_table_from_engine(THD* thd, frmblob and frmlen are set, write the frm to disk */ - (void)strxnmov(path,FN_REFLEN,mysql_data_home,"/",db,"/",name,NullS); + (void)strxnmov(path,FN_REFLEN-1,mysql_data_home,"/",db,"/",name,NullS); // Save the frm file error= writefrm(path, frmblob, frmlen); my_free((char*) frmblob, MYF(0)); if (error) DBUG_RETURN(2); - if (openfrm(thd, path,"",0,(uint) READ_ALL, 0, &table)) + init_tmp_table_share(&share, db, 0, name, path); + if (open_table_def(thd, &share, 0)) + { + DBUG_RETURN(3); + } + if (open_table_from_share(thd, &share, "" ,0, 0, 0, &table, FALSE)) + { + free_table_share(&share); DBUG_RETURN(3); + } update_create_info_from_table(&create_info, &table); create_info.table_options|= HA_OPTION_CREATE_FROM_ENGINE; @@ -2194,7 +2432,7 @@ int ha_create_table_from_engine(THD* thd, my_casedn_str(files_charset_info, path); } error=table.file->create(path,&table,&create_info); - VOID(closefrm(&table)); + VOID(closefrm(&table, 1)); DBUG_RETURN(error != 0); } @@ -2312,7 +2550,7 @@ int ha_discover(THD *thd, const char *db, const char *name, DBUG_PRINT("enter", ("db: %s, name: %s", db, name)); if (is_prefix(name,tmp_file_prefix)) /* skip temporary tables */ DBUG_RETURN(error); -#ifdef HAVE_NDBCLUSTER_DB +#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE if (have_ndbcluster == SHOW_OPTION_YES) error= ndbcluster_discover(thd, db, name, frmblob, frmlen); #endif @@ -2336,7 +2574,7 @@ ha_find_files(THD *thd,const char *db,const char *path, DBUG_ENTER("ha_find_files"); DBUG_PRINT("enter", ("db: %s, path: %s, wild: %s, dir: %d", db, path, wild, dir)); -#ifdef HAVE_NDBCLUSTER_DB +#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE if (have_ndbcluster == SHOW_OPTION_YES) error= ndbcluster_find_files(thd, db, path, wild, dir, files); #endif @@ -2358,7 +2596,7 @@ int ha_table_exists_in_engine(THD* thd, const char* db, const char* name) int error= 0; DBUG_ENTER("ha_table_exists_in_engine"); DBUG_PRINT("enter", ("db: %s, name: %s", db, name)); -#ifdef HAVE_NDBCLUSTER_DB +#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE if (have_ndbcluster == SHOW_OPTION_YES) error= ndbcluster_table_exists_in_engine(thd, db, name); #endif @@ -2366,6 +2604,143 @@ int ha_table_exists_in_engine(THD* thd, const char* db, const char* name) DBUG_RETURN(error); } +#ifdef HAVE_NDB_BINLOG +/* + TODO: change this into a dynamic struct + List<handlerton> does not work as + 1. binlog_end is called when MEM_ROOT is gone + 2. cannot work with thd MEM_ROOT as memory should be freed +*/ +#define MAX_HTON_LIST_ST 63 +struct hton_list_st +{ + handlerton *hton[MAX_HTON_LIST_ST]; + uint sz; +}; + +struct binlog_func_st +{ + enum_binlog_func fn; + void *arg; +}; + +/* + Listing handlertons first to avoid recursive calls and deadlock +*/ +static my_bool binlog_func_list(THD *thd, st_plugin_int *plugin, void *arg) +{ + hton_list_st *hton_list= (hton_list_st *)arg; + handlerton *hton= (handlerton *) plugin->plugin->info; + if (hton->state == SHOW_OPTION_YES && hton->binlog_func) + { + uint sz= hton_list->sz; + if (sz == MAX_HTON_LIST_ST-1) + { + /* list full */ + return FALSE; + } + hton_list->hton[sz]= hton; + hton_list->sz= sz+1; + } + return FALSE; +} + +static my_bool binlog_func_foreach(THD *thd, binlog_func_st *bfn) +{ + handlerton *hton; + hton_list_st hton_list; + hton_list.sz= 0; + plugin_foreach(thd, binlog_func_list, + MYSQL_STORAGE_ENGINE_PLUGIN, &hton_list); + + uint i= 0, sz= hton_list.sz; + while(i < sz) + hton_list.hton[i++]->binlog_func(thd, bfn->fn, bfn->arg); + return FALSE; +} + +int ha_reset_logs(THD *thd) +{ + binlog_func_st bfn= {BFN_RESET_LOGS, 0}; + binlog_func_foreach(thd, &bfn); + return 0; +} + +void ha_reset_slave(THD* thd) +{ + binlog_func_st bfn= {BFN_RESET_SLAVE, 0}; + binlog_func_foreach(thd, &bfn); +} + +void ha_binlog_wait(THD* thd) +{ + binlog_func_st bfn= {BFN_BINLOG_WAIT, 0}; + binlog_func_foreach(thd, &bfn); +} + +int ha_binlog_end(THD* thd) +{ + binlog_func_st bfn= {BFN_BINLOG_END, 0}; + binlog_func_foreach(thd, &bfn); + return 0; +} + +int ha_binlog_index_purge_file(THD *thd, const char *file) +{ + binlog_func_st bfn= {BFN_BINLOG_PURGE_FILE, (void *)file}; + binlog_func_foreach(thd, &bfn); + return 0; +} + +struct binlog_log_query_st +{ + enum_binlog_command binlog_command; + const char *query; + uint query_length; + const char *db; + const char *table_name; +}; + +static my_bool binlog_log_query_handlerton2(THD *thd, + const handlerton *hton, + void *args) +{ + struct binlog_log_query_st *b= (struct binlog_log_query_st*)args; + if (hton->state == SHOW_OPTION_YES && hton->binlog_log_query) + hton->binlog_log_query(thd, + b->binlog_command, + b->query, + b->query_length, + b->db, + b->table_name); + return FALSE; +} + +static my_bool binlog_log_query_handlerton(THD *thd, + st_plugin_int *plugin, + void *args) +{ + return binlog_log_query_handlerton2(thd, (const handlerton *) plugin->plugin->info, args); +} + +void ha_binlog_log_query(THD *thd, const handlerton *hton, + enum_binlog_command binlog_command, + const char *query, uint query_length, + const char *db, const char *table_name) +{ + struct binlog_log_query_st b; + b.binlog_command= binlog_command; + b.query= query; + b.query_length= query_length; + b.db= db; + b.table_name= table_name; + if (hton == 0) + plugin_foreach(thd, binlog_log_query_handlerton, + MYSQL_STORAGE_ENGINE_PLUGIN, &b); + else + binlog_log_query_handlerton2(thd, hton, &b); +} +#endif /* Read the first row of a multi-range set. @@ -2612,7 +2987,7 @@ int handler::compare_key(key_range *range) int handler::index_read_idx(byte * buf, uint index, const byte * key, uint key_len, enum ha_rkey_function find_flag) { - int error= ha_index_init(index); + int error= ha_index_init(index, 0); if (!error) error= index_read(buf, key, key_len, find_flag); if (!error) @@ -2636,40 +3011,50 @@ int handler::index_read_idx(byte * buf, uint index, const byte * key, pointer pointer to TYPELIB structure */ +static my_bool exts_handlerton(THD *unused, st_plugin_int *plugin, + void *arg) +{ + List<char> *found_exts= (List<char> *) arg; + handlerton *hton= (handlerton *) plugin->plugin->info; + handler *file; + if (hton->state == SHOW_OPTION_YES && hton->create && + (file= hton->create((TABLE_SHARE*) 0))) + { + List_iterator_fast<char> it(*found_exts); + const char **ext, *old_ext; + + for (ext= file->bas_ext(); *ext; ext++) + { + while ((old_ext= it++)) + { + if (!strcmp(old_ext, *ext)) + break; + } + if (!old_ext) + found_exts->push_back((char *) *ext); + + it.rewind(); + } + delete file; + } + return FALSE; +} + TYPELIB *ha_known_exts(void) { MEM_ROOT *mem_root= current_thd->mem_root; if (!known_extensions.type_names || mysys_usage_id != known_extensions_id) { - handlerton **types; List<char> found_exts; - List_iterator_fast<char> it(found_exts); const char **ext, *old_ext; known_extensions_id= mysys_usage_id; found_exts.push_back((char*) triggers_file_ext); found_exts.push_back((char*) trigname_file_ext); - for (types= sys_table_types; *types; types++) - { - if ((*types)->state == SHOW_OPTION_YES) - { - handler *file= get_new_handler(0, mem_root, - (enum db_type) (*types)->db_type); - for (ext= file->bas_ext(); *ext; ext++) - { - while ((old_ext= it++)) - { - if (!strcmp(old_ext, *ext)) - break; - } - if (!old_ext) - found_exts.push_back((char *) *ext); - - it.rewind(); - } - delete file; - } - } + + plugin_foreach(NULL, exts_handlerton, + MYSQL_STORAGE_ENGINE_PLUGIN, &found_exts); + ext= (const char **) my_once_alloc(sizeof(char *)* (found_exts.elements+1), MYF(MY_WME | MY_FAE)); @@ -2678,6 +3063,7 @@ TYPELIB *ha_known_exts(void) known_extensions.count= found_exts.elements; known_extensions.type_names= ext; + List_iterator_fast<char> it(found_exts); while ((old_ext= it++)) *ext++= old_ext; *ext= 0; @@ -2686,6 +3072,175 @@ TYPELIB *ha_known_exts(void) } +static bool stat_print(THD *thd, const char *type, uint type_len, + const char *file, uint file_len, + const char *status, uint status_len) +{ + Protocol *protocol= thd->protocol; + protocol->prepare_for_resend(); + protocol->store(type, type_len, system_charset_info); + protocol->store(file, file_len, system_charset_info); + protocol->store(status, status_len, system_charset_info); + if (protocol->write()) + return TRUE; + return FALSE; +} + + +static my_bool showstat_handlerton(THD *thd, st_plugin_int *plugin, + void *arg) +{ + enum ha_stat_type stat= *(enum ha_stat_type *) arg; + handlerton *hton= (handlerton *) plugin->plugin->info; + if (hton->state == SHOW_OPTION_YES && hton->show_status && + hton->show_status(thd, stat_print, stat)) + return TRUE; + return FALSE; +} + +bool ha_show_status(THD *thd, handlerton *db_type, enum ha_stat_type stat) +{ + List<Item> field_list; + Protocol *protocol= thd->protocol; + bool result; + + field_list.push_back(new Item_empty_string("Type",10)); + field_list.push_back(new Item_empty_string("Name",FN_REFLEN)); + field_list.push_back(new Item_empty_string("Status",10)); + + if (protocol->send_fields(&field_list, + Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)) + return TRUE; + + if (db_type == NULL) + { + result= plugin_foreach(thd, showstat_handlerton, + MYSQL_STORAGE_ENGINE_PLUGIN, &stat); + } + else + { + if (db_type->state != SHOW_OPTION_YES) + result= stat_print(thd, db_type->name, strlen(db_type->name), + "", 0, "DISABLED", 8) ? 1 : 0; + else + result= db_type->show_status && + db_type->show_status(thd, stat_print, stat) ? 1 : 0; + } + + if (!result) + send_eof(thd); + return result; +} + +/* + Function to check if the conditions for row-based binlogging is + correct for the table. + + A row in the given table should be replicated if: + - Row-based replication is on + - It is not a temporary table + - The binlog is enabled + - The table shall be binlogged (binlog_*_db rules) +*/ + +#ifdef HAVE_ROW_BASED_REPLICATION +/* The Sun compiler cannot instantiate the template below if this is + declared static, but it works by putting it into an anonymous + namespace. */ +namespace { + bool check_table_binlog_row_based(THD *thd, TABLE *table) + { + return + thd->current_stmt_binlog_row_based && + thd && (thd->options & OPTION_BIN_LOG) && + (table->s->tmp_table == NO_TMP_TABLE) && + binlog_filter->db_ok(table->s->db.str); + } +} + +template<class RowsEventT> int binlog_log_row(TABLE* table, + const byte *before_record, + const byte *after_record) +{ + if (table->file->is_injective()) + return 0; + bool error= 0; + THD *const thd= current_thd; + + if (check_table_binlog_row_based(thd, table)) + { + MY_BITMAP cols; + /* Potential buffer on the stack for the bitmap */ + uint32 bitbuf[BITMAP_STACKBUF_SIZE/sizeof(uint32)]; + uint n_fields= table->s->fields; + my_bool use_bitbuf= n_fields <= sizeof(bitbuf)*8; + if (likely(!(error= bitmap_init(&cols, + use_bitbuf ? bitbuf : NULL, + (n_fields + 7) & ~7UL, + false)))) + { + bitmap_set_all(&cols); + error= + RowsEventT::binlog_row_logging_function(thd, table, + table->file->has_transactions(), + &cols, table->s->fields, + before_record, after_record); + if (!use_bitbuf) + bitmap_free(&cols); + } + } + return error ? HA_ERR_RBR_LOGGING_FAILED : 0; +} + + +/* + Instantiate the versions we need for the above template function, because we + have -fno-implicit-template as compiling option. +*/ + +template int binlog_log_row<Write_rows_log_event>(TABLE *, const byte *, const byte *); +template int binlog_log_row<Delete_rows_log_event>(TABLE *, const byte *, const byte *); +template int binlog_log_row<Update_rows_log_event>(TABLE *, const byte *, const byte *); + +#endif /* HAVE_ROW_BASED_REPLICATION */ + +int handler::ha_write_row(byte *buf) +{ + int error; + if (likely(!(error= write_row(buf)))) + { +#ifdef HAVE_ROW_BASED_REPLICATION + error= binlog_log_row<Write_rows_log_event>(table, 0, buf); +#endif + } + return error; +} + +int handler::ha_update_row(const byte *old_data, byte *new_data) +{ + int error; + if (likely(!(error= update_row(old_data, new_data)))) + { +#ifdef HAVE_ROW_BASED_REPLICATION + error= binlog_log_row<Update_rows_log_event>(table, old_data, new_data); +#endif + } + return error; +} + +int handler::ha_delete_row(const byte *buf) +{ + int error; + if (likely(!(error= delete_row(buf)))) + { +#ifdef HAVE_ROW_BASED_REPLICATION + error= binlog_log_row<Delete_rows_log_event>(table, buf, 0); +#endif + } + return error; +} + + #ifdef HAVE_REPLICATION /* Reports to table handlers up to which position we have sent the binlog @@ -2708,11 +3263,10 @@ TYPELIB *ha_known_exts(void) int ha_repl_report_sent_binlog(THD *thd, char *log_file_name, my_off_t end_offset) { -#ifdef HAVE_INNOBASE_DB - return innobase_repl_report_sent_binlog(thd,log_file_name,end_offset); -#else - return 0; +#ifdef WITH_INNOBASE_STORAGE_ENGINE + innobase_repl_report_sent_binlog(thd, log_file_name, end_offset); #endif + return 0; } @@ -2737,3 +3291,4 @@ int ha_repl_report_replication_stop(THD *thd) return 0; } #endif /* HAVE_REPLICATION */ + diff --git a/sql/handler.h b/sql/handler.h index 977bd77a54e..d71841fe6b0 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -28,10 +28,7 @@ #define NO_HASH /* Not yet implemented */ #endif -#if defined(HAVE_BERKELEY_DB) || defined(HAVE_INNOBASE_DB) || \ - defined(HAVE_NDBCLUSTER_DB) #define USING_TRANSACTIONS -#endif // the following is for checking tables @@ -80,6 +77,13 @@ */ #define HA_CAN_INSERT_DELAYED (1 << 14) #define HA_PRIMARY_KEY_IN_READ_INDEX (1 << 15) +/* + If HA_PRIMARY_KEY_ALLOW_RANDOM_ACCESS is set, it means that the engine can + do this: the position of an arbitrary record can be retrieved using + position() when the table has a primary key, effectively allowing random + access on the table based on a given record. +*/ +#define HA_PRIMARY_KEY_ALLOW_RANDOM_ACCESS (1 << 16) #define HA_NOT_DELETE_WITH_CACHE (1 << 18) #define HA_NO_PREFIX_CHAR_KEYS (1 << 20) #define HA_CAN_FULLTEXT (1 << 21) @@ -92,7 +96,7 @@ #define HA_CAN_BIT_FIELD (1 << 28) /* supports bit fields */ #define HA_NEED_READ_RANGE_BUFFER (1 << 29) /* for read_multi_range */ #define HA_ANY_INDEX_MAY_BE_UNIQUE (1 << 30) - +#define HA_NO_COPY_ON_ALTER (1 << 31) /* bits in index_flags(index_number) for what you can do with index */ #define HA_READ_NEXT 1 /* TODO really use this flag */ @@ -103,12 +107,64 @@ #define HA_KEYREAD_ONLY 64 /* Support HA_EXTRA_KEYREAD */ /* + bits in alter_table_flags: +*/ +/* + These bits are set if different kinds of indexes can be created + off-line without re-create of the table (but with a table lock). +*/ +#define HA_ONLINE_ADD_INDEX_NO_WRITES (1L << 0) /*add index w/lock*/ +#define HA_ONLINE_DROP_INDEX_NO_WRITES (1L << 1) /*drop index w/lock*/ +#define HA_ONLINE_ADD_UNIQUE_INDEX_NO_WRITES (1L << 2) /*add unique w/lock*/ +#define HA_ONLINE_DROP_UNIQUE_INDEX_NO_WRITES (1L << 3) /*drop uniq. w/lock*/ +#define HA_ONLINE_ADD_PK_INDEX_NO_WRITES (1L << 4) /*add prim. w/lock*/ +#define HA_ONLINE_DROP_PK_INDEX_NO_WRITES (1L << 5) /*drop prim. w/lock*/ +/* + These are set if different kinds of indexes can be created on-line + (without a table lock). If a handler is capable of one or more of + these, it should also set the corresponding *_NO_WRITES bit(s). +*/ +#define HA_ONLINE_ADD_INDEX (1L << 6) /*add index online*/ +#define HA_ONLINE_DROP_INDEX (1L << 7) /*drop index online*/ +#define HA_ONLINE_ADD_UNIQUE_INDEX (1L << 8) /*add unique online*/ +#define HA_ONLINE_DROP_UNIQUE_INDEX (1L << 9) /*drop uniq. online*/ +#define HA_ONLINE_ADD_PK_INDEX (1L << 10)/*add prim. online*/ +#define HA_ONLINE_DROP_PK_INDEX (1L << 11)/*drop prim. online*/ +/* + HA_PARTITION_FUNCTION_SUPPORTED indicates that the function is + supported at all. + HA_FAST_CHANGE_PARTITION means that optimised variants of the changes + exists but they are not necessarily done online. + + HA_ONLINE_DOUBLE_WRITE means that the handler supports writing to both + the new partition and to the old partitions when updating through the + old partitioning schema while performing a change of the partitioning. + This means that we can support updating of the table while performing + the copy phase of the change. For no lock at all also a double write + from new to old must exist and this is not required when this flag is + set. + This is actually removed even before it was introduced the first time. + The new idea is that handlers will handle the lock level already in + store_lock for ALTER TABLE partitions. + + HA_PARTITION_ONE_PHASE is a flag that can be set by handlers that take + care of changing the partitions online and in one phase. Thus all phases + needed to handle the change are implemented inside the storage engine. + The storage engine must also support auto-discovery since the frm file + is changed as part of the change and this change must be controlled by + the storage engine. A typical engine to support this is NDB (through + WL #2498). +*/ +#define HA_PARTITION_FUNCTION_SUPPORTED (1L << 12) +#define HA_FAST_CHANGE_PARTITION (1L << 13) +#define HA_PARTITION_ONE_PHASE (1L << 14) + +/* Index scan will not return records in rowid order. Not guaranteed to be set for unordered (e.g. HASH) indexes. */ #define HA_KEY_SCAN_NOT_ROR 128 - /* operations for disable/enable indexes */ #define HA_KEY_SWITCH_NONUNIQ 0 #define HA_KEY_SWITCH_ALL 1 @@ -121,17 +177,7 @@ example + csv + heap + blackhole + federated + 0 (yes, the sum is deliberately inaccurate) */ -#define MAX_HA 14 - -/* - Bits in index_ddl_flags(KEY *wanted_index) - for what ddl you can do with index - If none is set, the wanted type of index is not supported - by the handler at all. See WorkLog 1563. -*/ -#define HA_DDL_SUPPORT 1 /* Supported by handler */ -#define HA_DDL_WITH_LOCK 2 /* Can create/drop with locked table */ -#define HA_DDL_ONLINE 4 /* Can create/drop without lock */ +#define MAX_HA 15 /* Parameters for open() (in register form->filestat) @@ -174,7 +220,7 @@ /* Options of START TRANSACTION statement (and later of SET TRANSACTION stmt) */ #define MYSQL_START_TRANS_OPT_WITH_CONS_SNAPSHOT 1 -enum db_type +enum legacy_db_type { DB_TYPE_UNKNOWN=0,DB_TYPE_DIAB_ISAM=1, DB_TYPE_HASH,DB_TYPE_MISAM,DB_TYPE_PISAM, @@ -185,18 +231,38 @@ enum db_type DB_TYPE_EXAMPLE_DB, DB_TYPE_ARCHIVE_DB, DB_TYPE_CSV_DB, DB_TYPE_FEDERATED_DB, DB_TYPE_BLACKHOLE_DB, - DB_TYPE_DEFAULT // Must be last + DB_TYPE_PARTITION_DB, + DB_TYPE_BINLOG, + DB_TYPE_DEFAULT=127 // Must be last }; enum row_type { ROW_TYPE_NOT_USED=-1, ROW_TYPE_DEFAULT, ROW_TYPE_FIXED, ROW_TYPE_DYNAMIC, ROW_TYPE_COMPRESSED, ROW_TYPE_REDUNDANT, ROW_TYPE_COMPACT }; +enum enum_binlog_func { + BFN_RESET_LOGS= 1, + BFN_RESET_SLAVE= 2, + BFN_BINLOG_WAIT= 3, + BFN_BINLOG_END= 4, + BFN_BINLOG_PURGE_FILE= 5 +}; + +enum enum_binlog_command { + LOGCOM_CREATE_TABLE, + LOGCOM_ALTER_TABLE, + LOGCOM_RENAME_TABLE, + LOGCOM_DROP_TABLE, + LOGCOM_CREATE_DB, + LOGCOM_ALTER_DB, + LOGCOM_DROP_DB +}; + /* struct to hold information about the table that should be created */ /* Bits in used_fields */ #define HA_CREATE_USED_AUTO (1L << 0) -#define HA_CREATE_USED_RAID (1L << 1) +#define HA_CREATE_USED_RAID (1L << 1) //RAID is no longer availble #define HA_CREATE_USED_UNION (1L << 2) #define HA_CREATE_USED_INSERT_METHOD (1L << 3) #define HA_CREATE_USED_MIN_ROWS (1L << 4) @@ -225,6 +291,9 @@ typedef ulonglong my_xid; // this line is the same as in log_event.h #define MAXGTRIDSIZE 64 #define MAXBQUALSIZE 64 +#define COMPATIBLE_DATA_YES 0 +#define COMPATIBLE_DATA_NO 1 + struct xid_t { long formatID; long gtrid_length; @@ -301,6 +370,94 @@ typedef struct xid_t XID; #endif /* + These structures are used to pass information from a set of SQL commands + on add/drop/change tablespace definitions to the proper hton. +*/ +#define UNDEF_NODEGROUP 65535 +enum ts_command_type +{ + TS_CMD_NOT_DEFINED = -1, + CREATE_TABLESPACE = 0, + ALTER_TABLESPACE = 1, + CREATE_LOGFILE_GROUP = 2, + ALTER_LOGFILE_GROUP = 3, + DROP_TABLESPACE = 4, + DROP_LOGFILE_GROUP = 5, + CHANGE_FILE_TABLESPACE = 6, + ALTER_ACCESS_MODE_TABLESPACE = 7 +}; + +enum ts_alter_tablespace_type +{ + TS_ALTER_TABLESPACE_TYPE_NOT_DEFINED = -1, + ALTER_TABLESPACE_ADD_FILE = 1, + ALTER_TABLESPACE_DROP_FILE = 2 +}; + +enum tablespace_access_mode +{ + TS_NOT_DEFINED= -1, + TS_READ_ONLY = 0, + TS_READ_WRITE = 1, + TS_NOT_ACCESSIBLE = 2 +}; + +class st_alter_tablespace : public Sql_alloc +{ + public: + const char *tablespace_name; + const char *logfile_group_name; + enum ts_command_type ts_cmd_type; + enum ts_alter_tablespace_type ts_alter_tablespace_type; + const char *data_file_name; + const char *undo_file_name; + const char *redo_file_name; + ulonglong extent_size; + ulonglong undo_buffer_size; + ulonglong redo_buffer_size; + ulonglong initial_size; + ulonglong autoextend_size; + ulonglong max_size; + uint nodegroup_id; + enum legacy_db_type storage_engine; + bool wait_until_completed; + const char *ts_comment; + enum tablespace_access_mode ts_access_mode; + st_alter_tablespace() + { + tablespace_name= NULL; + logfile_group_name= "DEFAULT_LG"; //Default log file group + ts_cmd_type= TS_CMD_NOT_DEFINED; + data_file_name= NULL; + undo_file_name= NULL; + redo_file_name= NULL; + extent_size= 1024*1024; //Default 1 MByte + undo_buffer_size= 8*1024*1024; //Default 8 MByte + redo_buffer_size= 8*1024*1024; //Default 8 MByte + initial_size= 128*1024*1024; //Default 128 MByte + autoextend_size= 0; //No autoextension as default + max_size= 0; //Max size == initial size => no extension + storage_engine= DB_TYPE_UNKNOWN; + nodegroup_id= UNDEF_NODEGROUP; + wait_until_completed= TRUE; + ts_comment= NULL; + ts_access_mode= TS_NOT_DEFINED; + } +}; + +/* The handler for a table type. Will be included in the TABLE structure */ + +struct st_table; +typedef struct st_table TABLE; +typedef struct st_table_share TABLE_SHARE; +struct st_foreign_key_info; +typedef struct st_foreign_key_info FOREIGN_KEY_INFO; +typedef bool (stat_print_fn)(THD *thd, const char *type, uint type_len, + const char *file, uint file_len, + const char *status, uint status_len); +enum ha_stat_type { HA_ENGINE_STATUS, HA_ENGINE_LOGS, HA_ENGINE_MUTEX }; + +/* handlerton is a singleton structure - one instance per storage engine - to provide access to storage engine functionality that works on the "global" level (unlike handler class that works on a per-table basis) @@ -314,6 +471,14 @@ typedef struct xid_t XID; typedef struct { /* + handlerton structure version + */ + const int interface_version; +/* last version change: 0x0001 in 5.1.6 */ +#define MYSQL_HANDLERTON_INTERFACE_VERSION 0x0001 + + + /* storage engine name as it should be printed to a user */ const char *name; @@ -332,7 +497,7 @@ typedef struct Historical number used for frm file to determine the correct storage engine. This is going away and new engines will just use "name" for this. */ - enum db_type db_type; + enum legacy_db_type db_type; /* Method that initizlizes a storage engine */ @@ -394,12 +559,34 @@ typedef struct void *(*create_cursor_read_view)(); void (*set_cursor_read_view)(void *); void (*close_cursor_read_view)(void *); + handler *(*create)(TABLE_SHARE *table); + void (*drop_database)(char* path); + int (*panic)(enum ha_panic_function flag); + int (*start_consistent_snapshot)(THD *thd); + bool (*flush_logs)(); + bool (*show_status)(THD *thd, stat_print_fn *print, enum ha_stat_type stat); + uint (*partition_flags)(); + uint (*alter_table_flags)(uint flags); + int (*alter_tablespace)(THD *thd, st_alter_tablespace *ts_info); + int (*fill_files_table)(THD *thd, + struct st_table_list *tables, + class Item *cond); uint32 flags; /* global handler flags */ + /* + Those handlerton functions below are properly initialized at handler + init. + */ + int (*binlog_func)(THD *thd, enum_binlog_func fn, void *arg); + void (*binlog_log_query)(THD *thd, enum_binlog_command binlog_command, + const char *query, uint query_length, + const char *db, const char *table_name); } handlerton; +extern const handlerton default_hton; + struct show_table_alias_st { const char *alias; - const char *type; + enum legacy_db_type type; }; /* Possible flags of a handlerton */ @@ -408,6 +595,9 @@ struct show_table_alias_st { #define HTON_ALTER_NOT_SUPPORTED (1 << 1) //Engine does not support alter #define HTON_CAN_RECREATE (1 << 2) //Delete all is used fro truncate #define HTON_HIDDEN (1 << 3) //Engine does not appear in lists +#define HTON_FLUSH_AFTER_RENAME (1 << 4) +#define HTON_NOT_USER_SELECTABLE (1 << 5) +#define HTON_TEMPORARY_NOT_SUPPORTED (1 << 6) //Having temporary tables not supported typedef struct st_thd_trans { @@ -422,39 +612,59 @@ typedef struct st_thd_trans enum enum_tx_isolation { ISO_READ_UNCOMMITTED, ISO_READ_COMMITTED, ISO_REPEATABLE_READ, ISO_SERIALIZABLE}; + +enum ndb_distribution { ND_KEYHASH= 0, ND_LINHASH= 1 }; + + +typedef struct { + ulonglong data_file_length; + ulonglong max_data_file_length; + ulonglong index_file_length; + ulonglong delete_length; + ha_rows records; + ulong mean_rec_length; + time_t create_time; + time_t check_time; + time_t update_time; + ulonglong check_sum; +} PARTITION_INFO; + +#define UNDEF_NODEGROUP 65535 +class Item; + +class partition_info; + +struct st_partition_iter; +#define NOT_A_PARTITION_ID ((uint32)-1) + + + typedef struct st_ha_create_information { CHARSET_INFO *table_charset, *default_table_charset; LEX_STRING connect_string; - const char *comment,*password; + const char *comment,*password, *tablespace; const char *data_file_name, *index_file_name; const char *alias; ulonglong max_rows,min_rows; ulonglong auto_increment_value; ulong table_options; ulong avg_row_length; - ulong raid_chunksize; ulong used_fields; SQL_LIST merge_list; - enum db_type db_type; + handlerton *db_type; enum row_type row_type; uint null_bits; /* NULL bits at start of record */ uint options; /* OR of HA_CREATE_ options */ - uint raid_type,raid_chunks; uint merge_insert_method; uint extra_size; /* length of extra data segment */ bool table_existed; /* 1 in create if table existed */ bool frm_only; /* 1 if no ha_create_table() */ bool varchar; /* 1 if table has a VARCHAR */ + bool store_on_disk; /* 1 if table stored on disk */ } HA_CREATE_INFO; -/* The handler for a table type. Will be included in the TABLE structure */ - -struct st_table; -typedef struct st_table TABLE; -struct st_foreign_key_info; -typedef struct st_foreign_key_info FOREIGN_KEY_INFO; typedef struct st_savepoint SAVEPOINT; extern ulong savepoint_alloc_size; @@ -473,6 +683,7 @@ typedef struct st_ha_check_opt } HA_CHECK_OPT; + /* This is a buffer area that the handler can use to store rows. 'end_of_used_area' should be kept updated after calls to @@ -487,13 +698,22 @@ typedef struct st_handler_buffer byte *end_of_used_area; /* End of area that was used by handler */ } HANDLER_BUFFER; +typedef struct system_status_var SSV; +/* + The handler class is the interface for dynamically loadable + storage engines. Do not add ifdefs and take care when adding or + changing virtual functions to avoid vtable confusion + */ class handler :public Sql_alloc { + friend class ha_partition; + protected: - struct st_table *table; /* The table definition */ + struct st_table_share *table_share; /* The table definition */ + struct st_table *table; /* The current open table */ - virtual int index_init(uint idx) { active_index=idx; return 0; } + virtual int index_init(uint idx, bool sorted) { active_index=idx; return 0; } virtual int index_end() { active_index=MAX_KEY; return 0; } /* rnd_init() can be called two times without rnd_end() in between @@ -505,6 +725,11 @@ class handler :public Sql_alloc virtual int rnd_init(bool scan) =0; virtual int rnd_end() { return 0; } + void ha_statistic_increment(ulong SSV::*offset) const; + + +private: + virtual int reset() { return extra(HA_EXTRA_RESET); } public: const handlerton *ht; /* storage engine of this handler */ byte *ref; /* Pointer to current row */ @@ -517,7 +742,6 @@ public: ulonglong auto_increment_value; ha_rows records; /* Records in table */ ha_rows deleted; /* Deleted records */ - ulong raid_chunksize; ulong mean_rec_length; /* physical reclength */ time_t create_time; /* When table was created */ time_t check_time; @@ -541,38 +765,74 @@ public: /* Length of ref (1-8 or the clustered key length) */ uint ref_length; uint block_size; /* index block size */ - uint raid_type,raid_chunks; FT_INFO *ft_handler; enum {NONE=0, INDEX, RND} inited; bool auto_increment_column_changed; bool implicit_emptied; /* Can be !=0 only if HEAP */ const COND *pushed_cond; + MY_BITMAP *read_set; + MY_BITMAP *write_set; - handler(const handlerton *ht_arg, TABLE *table_arg) :table(table_arg), - ht(ht_arg), + handler(const handlerton *ht_arg, TABLE_SHARE *share_arg) + :table_share(share_arg), ht(ht_arg), ref(0), data_file_length(0), max_data_file_length(0), index_file_length(0), delete_length(0), auto_increment_value(0), records(0), deleted(0), mean_rec_length(0), create_time(0), check_time(0), update_time(0), key_used_on_scan(MAX_KEY), active_index(MAX_KEY), ref_length(sizeof(my_off_t)), block_size(0), - raid_type(0), ft_handler(0), inited(NONE), implicit_emptied(0), + ft_handler(0), inited(NONE), implicit_emptied(0), pushed_cond(NULL) {} - virtual ~handler(void) { /* TODO: DBUG_ASSERT(inited == NONE); */ } - int ha_open(const char *name, int mode, int test_if_locked); + virtual ~handler(void) + { + /* TODO: DBUG_ASSERT(inited == NONE); */ + } + /* + Check whether a handler allows to lock the table. + + SYNOPSIS + check_if_locking_is_allowed() + thd Handler of the thread, trying to lock the table + table Table handler to check + count Number of locks already granted to the table + + DESCRIPTION + Check whether a handler allows to lock the table. For instance, + MyISAM does not allow to lock mysql.proc along with other tables. + This limitation stems from the fact that MyISAM does not support + row-level locking and we have to add this limitation to avoid + deadlocks. + + RETURN + TRUE Locking is allowed + FALSE Locking is not allowed. The error was thrown. + */ + virtual bool check_if_locking_is_allowed(uint sql_command, + ulong type, TABLE *table, + uint count, + bool called_by_logger_thread) + { + return TRUE; + } + virtual int ha_initialise(); + int ha_open(TABLE *table, const char *name, int mode, int test_if_locked); bool update_auto_increment(); virtual void print_error(int error, myf errflag); virtual bool get_error_message(int error, String *buf); uint get_dup_key(int error); - void change_table_ptr(TABLE *table_arg) { table=table_arg; } + void change_table_ptr(TABLE *table_arg, TABLE_SHARE *share) + { + table= table_arg; + table_share= share; + } virtual double scan_time() { return ulonglong2double(data_file_length) / IO_SIZE + 2; } virtual double read_time(uint index, uint ranges, ha_rows rows) { return rows2double(ranges+rows); } virtual const key_map *keys_to_use_for_scanning() { return &key_map_empty; } virtual bool has_transactions(){ return 0;} - virtual uint extra_rec_buf_length() { return 0; } + virtual uint extra_rec_buf_length() const { return 0; } /* Return upper bound of current number of records in the table @@ -591,12 +851,12 @@ public: virtual const char *index_type(uint key_number) { DBUG_ASSERT(0); return "";} - int ha_index_init(uint idx) + int ha_index_init(uint idx, bool sorted) { DBUG_ENTER("ha_index_init"); DBUG_ASSERT(inited==NONE); inited=INDEX; - DBUG_RETURN(index_init(idx)); + DBUG_RETURN(index_init(idx, sorted)); } int ha_index_end() { @@ -619,19 +879,230 @@ public: inited=NONE; DBUG_RETURN(rnd_end()); } + int ha_reset() + { + DBUG_ENTER("ha_reset"); + ha_clear_all_set(); + DBUG_RETURN(reset()); + } + /* this is necessary in many places, e.g. in HANDLER command */ int ha_index_or_rnd_end() { return inited == INDEX ? ha_index_end() : inited == RND ? ha_rnd_end() : 0; } + /* + These are a set of routines used to enable handlers to only read/write + partial lists of the fields in the table. The bit vector is maintained + by the server part and is used by the handler at calls to read/write + data in the table. + It replaces the use of query id's for this purpose. The benefit is that + the handler can also set bits in the read/write set if it has special + needs and it is also easy for other parts of the server to interact + with the handler (e.g. the replication part for row-level logging). + The routines are all part of the general handler and are not possible + to override by a handler. A handler can however set/reset bits by + calling these routines. + + The methods ha_retrieve_all_cols and ha_retrieve_all_pk are made + virtual to handle InnoDB specifics. If InnoDB doesn't need the + extra parameters HA_EXTRA_RETRIEVE_ALL_COLS and + HA_EXTRA_RETRIEVE_PRIMARY_KEY anymore then these methods need not be + virtual anymore. + */ + virtual int ha_retrieve_all_cols(); + virtual int ha_retrieve_all_pk(); + void ha_set_all_bits_in_read_set() + { + DBUG_ENTER("ha_set_all_bits_in_read_set"); + bitmap_set_all(read_set); + DBUG_VOID_RETURN; + } + void ha_set_all_bits_in_write_set() + { + DBUG_ENTER("ha_set_all_bits_in_write_set"); + bitmap_set_all(write_set); + DBUG_VOID_RETURN; + } + void ha_set_bit_in_read_set(uint fieldnr) + { + DBUG_ENTER("ha_set_bit_in_read_set"); + DBUG_PRINT("info", ("fieldnr = %d", fieldnr)); + bitmap_set_bit(read_set, fieldnr); + DBUG_VOID_RETURN; + } + void ha_clear_bit_in_read_set(uint fieldnr) + { + DBUG_ENTER("ha_clear_bit_in_read_set"); + DBUG_PRINT("info", ("fieldnr = %d", fieldnr)); + bitmap_clear_bit(read_set, fieldnr); + DBUG_VOID_RETURN; + } + void ha_set_bit_in_write_set(uint fieldnr) + { + DBUG_ENTER("ha_set_bit_in_write_set"); + DBUG_PRINT("info", ("fieldnr = %d", fieldnr)); + bitmap_set_bit(write_set, fieldnr); + DBUG_VOID_RETURN; + } + void ha_clear_bit_in_write_set(uint fieldnr) + { + DBUG_ENTER("ha_clear_bit_in_write_set"); + DBUG_PRINT("info", ("fieldnr = %d", fieldnr)); + bitmap_clear_bit(write_set, fieldnr); + DBUG_VOID_RETURN; + } + void ha_set_bit_in_rw_set(uint fieldnr, bool write_op) + { + DBUG_ENTER("ha_set_bit_in_rw_set"); + DBUG_PRINT("info", ("Set bit %u in read set", fieldnr)); + bitmap_set_bit(read_set, fieldnr); + if (!write_op) { + DBUG_VOID_RETURN; + } + else + { + DBUG_PRINT("info", ("Set bit %u in read and write set", fieldnr)); + bitmap_set_bit(write_set, fieldnr); + } + DBUG_VOID_RETURN; + } + bool ha_get_bit_in_read_set(uint fieldnr) + { + bool bit_set=bitmap_is_set(read_set,fieldnr); + DBUG_ENTER("ha_get_bit_in_read_set"); + DBUG_PRINT("info", ("bit %u = %u", fieldnr, bit_set)); + DBUG_RETURN(bit_set); + } + bool ha_get_bit_in_write_set(uint fieldnr) + { + bool bit_set=bitmap_is_set(write_set,fieldnr); + DBUG_ENTER("ha_get_bit_in_write_set"); + DBUG_PRINT("info", ("bit %u = %u", fieldnr, bit_set)); + DBUG_RETURN(bit_set); + } + bool ha_get_all_bit_in_read_set() + { + bool all_bits_set= bitmap_is_set_all(read_set); + DBUG_ENTER("ha_get_all_bit_in_read_set"); + DBUG_PRINT("info", ("all bits set = %u", all_bits_set)); + DBUG_RETURN(all_bits_set); + } + bool ha_get_all_bit_in_read_clear() + { + bool all_bits_set= bitmap_is_clear_all(read_set); + DBUG_ENTER("ha_get_all_bit_in_read_clear"); + DBUG_PRINT("info", ("all bits clear = %u", all_bits_set)); + DBUG_RETURN(all_bits_set); + } + bool ha_get_all_bit_in_write_set() + { + bool all_bits_set= bitmap_is_set_all(write_set); + DBUG_ENTER("ha_get_all_bit_in_write_set"); + DBUG_PRINT("info", ("all bits set = %u", all_bits_set)); + DBUG_RETURN(all_bits_set); + } + bool ha_get_all_bit_in_write_clear() + { + bool all_bits_set= bitmap_is_clear_all(write_set); + DBUG_ENTER("ha_get_all_bit_in_write_clear"); + DBUG_PRINT("info", ("all bits clear = %u", all_bits_set)); + DBUG_RETURN(all_bits_set); + } + void ha_set_primary_key_in_read_set(); + int ha_allocate_read_write_set(ulong no_fields); + void ha_clear_all_set(); uint get_index(void) const { return active_index; } virtual int open(const char *name, int mode, uint test_if_locked)=0; virtual int close(void)=0; - virtual int write_row(byte * buf) { return HA_ERR_WRONG_COMMAND; } - virtual int update_row(const byte * old_data, byte * new_data) - { return HA_ERR_WRONG_COMMAND; } - virtual int delete_row(const byte * buf) - { return HA_ERR_WRONG_COMMAND; } + virtual int ha_write_row(byte * buf); + virtual int ha_update_row(const byte * old_data, byte * new_data); + virtual int ha_delete_row(const byte * buf); + /* + If the handler does it's own injection of the rows, this member function + should return 'true'. + */ + virtual bool is_injective() const { return false; } + + /* + SYNOPSIS + start_bulk_update() + RETURN + 0 Bulk update used by handler + 1 Bulk update not used, normal operation used + */ + virtual bool start_bulk_update() { return 1; } + /* + SYNOPSIS + start_bulk_delete() + RETURN + 0 Bulk delete used by handler + 1 Bulk delete not used, normal operation used + */ + virtual bool start_bulk_delete() { return 1; } + /* + SYNOPSIS + This method is similar to update_row, however the handler doesn't need + to execute the updates at this point in time. The handler can be certain + that another call to bulk_update_row will occur OR a call to + exec_bulk_update before the set of updates in this query is concluded. + + bulk_update_row() + old_data Old record + new_data New record + dup_key_found Number of duplicate keys found + RETURN + 0 Bulk delete used by handler + 1 Bulk delete not used, normal operation used + */ + virtual int bulk_update_row(const byte *old_data, byte *new_data, + uint *dup_key_found) + { + DBUG_ASSERT(FALSE); + return HA_ERR_WRONG_COMMAND; + } + /* + SYNOPSIS + After this call all outstanding updates must be performed. The number + of duplicate key errors are reported in the duplicate key parameter. + It is allowed to continue to the batched update after this call, the + handler has to wait until end_bulk_update with changing state. + + exec_bulk_update() + dup_key_found Number of duplicate keys found + RETURN + 0 Success + >0 Error code + */ + virtual int exec_bulk_update(uint *dup_key_found) + { + DBUG_ASSERT(FALSE); + return HA_ERR_WRONG_COMMAND; + } + /* + SYNOPSIS + Perform any needed clean-up, no outstanding updates are there at the + moment. + + end_bulk_update() + RETURN + Nothing + */ + virtual void end_bulk_update() { return; } + /* + SYNOPSIS + Execute all outstanding deletes and close down the bulk delete. + + end_bulk_delete() + RETURN + 0 Success + >0 Error code + */ + virtual int end_bulk_delete() + { + DBUG_ASSERT(FALSE); + return HA_ERR_WRONG_COMMAND; + } virtual int index_read(byte * buf, const byte * key, uint key_len, enum ha_rkey_function find_flag) { return HA_ERR_WRONG_COMMAND; } @@ -678,12 +1149,33 @@ public: { return (ha_rows) 10; } virtual void position(const byte *record)=0; virtual void info(uint)=0; // see my_base.h for full description + virtual void get_dynamic_partition_info(PARTITION_INFO *stat_info, + uint part_id); virtual int extra(enum ha_extra_function operation) { return 0; } virtual int extra_opt(enum ha_extra_function operation, ulong cache_size) { return extra(operation); } - virtual int reset() { return extra(HA_EXTRA_RESET); } virtual int external_lock(THD *thd, int lock_type) { return 0; } + /* + In an UPDATE or DELETE, if the row under the cursor was locked by another + transaction, and the engine used an optimistic read of the last + committed row value under the cursor, then the engine returns 1 from this + function. MySQL must NOT try to update this optimistic value. If the + optimistic value does not match the WHERE condition, MySQL can decide to + skip over this row. Currently only works for InnoDB. This can be used to + avoid unnecessary lock waits. + + If this method returns nonzero, it will also signal the storage + engine that the next read will be a locking re-read of the row. + */ + virtual bool was_semi_consistent_read() { return 0; } + /* + Tell the engine whether it should avoid unnecessary lock waits. + If yes, in an UPDATE or DELETE, if the row under the cursor was locked + by another transaction, the engine may try an optimistic read of + the last committed row value under the cursor. + */ + virtual void try_semi_consistent_read(bool) {} virtual void unlock_row() {} virtual int start_stmt(THD *thd, thr_lock_type lock_type) {return 0;} /* @@ -763,8 +1255,24 @@ public: virtual char *update_table_comment(const char * comment) { return (char*) comment;} virtual void append_create_info(String *packet) {} + /* + SYNOPSIS + is_fk_defined_on_table_or_index() + index Index to check if foreign key uses it + RETURN VALUE + TRUE Foreign key defined on table or index + FALSE No foreign key defined + DESCRIPTION + If index == MAX_KEY then a check for table is made and if index < + MAX_KEY then a check is made if the table has foreign keys and if + a foreign key uses this index (and thus the index cannot be dropped). + */ + virtual bool is_fk_defined_on_table_or_index(uint index) + { return FALSE; } virtual char* get_foreign_key_create_info() { return(NULL);} /* gets foreign key create string from InnoDB */ + virtual char* get_tablespace_name(THD *thd) + { return(NULL);} /* gets tablespace name from handler */ /* used in ALTER TABLE; 1 if changing storage engine is allowed */ virtual bool can_switch_engines() { return 1; } /* used in REPLACE; is > 0 if table is referred by a FOREIGN KEY */ @@ -778,12 +1286,25 @@ public: virtual const char *table_type() const =0; virtual const char **bas_ext() const =0; virtual ulong table_flags(void) const =0; + + virtual int get_default_no_partitions(ulonglong max_rows) { return 1;} + virtual void set_auto_partitions(partition_info *part_info) { return; } + virtual bool get_no_parts(const char *name, + uint *no_parts) + { + *no_parts= 0; + return 0; + } + virtual void set_part_info(partition_info *part_info) {return;} + virtual ulong index_flags(uint idx, uint part, bool all_parts) const =0; - virtual ulong index_ddl_flags(KEY *wanted_index) const - { return (HA_DDL_SUPPORT); } + virtual int add_index(TABLE *table_arg, KEY *key_info, uint num_of_keys) { return (HA_ERR_WRONG_COMMAND); } - virtual int drop_index(TABLE *table_arg, uint *key_num, uint num_of_keys) + virtual int prepare_drop_index(TABLE *table_arg, uint *key_num, + uint num_of_keys) + { return (HA_ERR_WRONG_COMMAND); } + virtual int final_drop_index(TABLE *table_arg) { return (HA_ERR_WRONG_COMMAND); } uint max_record_length() const @@ -815,8 +1336,30 @@ public: */ virtual int rename_table(const char *from, const char *to); virtual int delete_table(const char *name); + virtual void drop_table(const char *name); virtual int create(const char *name, TABLE *form, HA_CREATE_INFO *info)=0; + virtual int create_handler_files(const char *name) { return FALSE;} + + virtual int change_partitions(HA_CREATE_INFO *create_info, + const char *path, + ulonglong *copied, + ulonglong *deleted, + const void *pack_frm_data, + uint pack_frm_len) + { return HA_ERR_WRONG_COMMAND; } + virtual int drop_partitions(const char *path) + { return HA_ERR_WRONG_COMMAND; } + virtual int rename_partitions(const char *path) + { return HA_ERR_WRONG_COMMAND; } + virtual int optimize_partitions(THD *thd) + { return HA_ERR_WRONG_COMMAND; } + virtual int analyze_partitions(THD *thd) + { return HA_ERR_WRONG_COMMAND; } + virtual int check_partitions(THD *thd) + { return HA_ERR_WRONG_COMMAND; } + virtual int repair_partitions(THD *thd) + { return HA_ERR_WRONG_COMMAND; } /* lock_count() can be more than one if the table is a MERGE */ virtual uint lock_count(void) const { return 1; } @@ -881,6 +1424,34 @@ public: Pops the top if condition stack, if stack is not empty */ virtual void cond_pop() { return; }; + virtual bool check_if_incompatible_data(HA_CREATE_INFO *create_info, + uint table_changes) + { return COMPATIBLE_DATA_NO; } + +private: + + /* + Row-level primitives for storage engines. + These should be overridden by the storage engine class. To call + these methods, use the corresponding 'ha_*' method above. + */ + friend int ndb_add_binlog_index(THD *, void *); + + virtual int write_row(byte *buf __attribute__((unused))) + { + return HA_ERR_WRONG_COMMAND; + } + + virtual int update_row(const byte *old_data __attribute__((unused)), + byte *new_data __attribute__((unused))) + { + return HA_ERR_WRONG_COMMAND; + } + + virtual int delete_row(const byte *buf __attribute__((unused))) + { + return HA_ERR_WRONG_COMMAND; + } }; /* Some extern variables used with handlers */ @@ -898,26 +1469,55 @@ extern ulong total_ha, total_ha_2pc; #define ha_rollback(thd) (ha_rollback_trans((thd), TRUE)) /* lookups */ -enum db_type ha_resolve_by_name(const char *name, uint namelen); -const char *ha_get_storage_engine(enum db_type db_type); -handler *get_new_handler(TABLE *table, MEM_ROOT *alloc, enum db_type db_type); -enum db_type ha_checktype(THD *thd, enum db_type database_type, +handlerton *ha_resolve_by_name(THD *thd, LEX_STRING *name); +handlerton *ha_resolve_by_legacy_type(THD *thd, enum legacy_db_type db_type); +const char *ha_get_storage_engine(enum legacy_db_type db_type); +handler *get_new_handler(TABLE_SHARE *share, MEM_ROOT *alloc, + handlerton *db_type); +handlerton *ha_checktype(THD *thd, enum legacy_db_type database_type, bool no_substitute, bool report_error); -bool ha_check_storage_engine_flag(enum db_type db_type, uint32 flag); + + +static inline enum legacy_db_type ha_legacy_type(const handlerton *db_type) +{ + return (db_type == NULL) ? DB_TYPE_UNKNOWN : db_type->db_type; +} + +static inline const char *ha_resolve_storage_engine_name(const handlerton *db_type) +{ + return db_type == NULL ? "UNKNOWN" : db_type->name; +} + +static inline bool ha_check_storage_engine_flag(const handlerton *db_type, uint32 flag) +{ + return db_type == NULL ? FALSE : test(db_type->flags & flag); +} + +static inline bool ha_storage_engine_is_enabled(const handlerton *db_type) +{ + return (db_type && db_type->create) ? + (db_type->state == SHOW_OPTION_YES) : FALSE; +} /* basic stuff */ int ha_init(void); +int ha_register_builtin_plugins(); +int ha_initialize_handlerton(handlerton *hton); + TYPELIB *ha_known_exts(void); int ha_panic(enum ha_panic_function flag); -int ha_update_statistics(); void ha_close_connection(THD* thd); -my_bool ha_storage_engine_is_enabled(enum db_type database_type); -bool ha_flush_logs(void); +bool ha_flush_logs(handlerton *db_type); void ha_drop_database(char* path); -int ha_create_table(const char *name, HA_CREATE_INFO *create_info, +int ha_create_table(THD *thd, const char *path, + const char *db, const char *table_name, + HA_CREATE_INFO *create_info, bool update_create_info); -int ha_delete_table(THD *thd, enum db_type db_type, const char *path, - const char *alias, bool generate_warning); +int ha_delete_table(THD *thd, handlerton *db_type, const char *path, + const char *db, const char *alias, bool generate_warning); + +/* statistics and info */ +bool ha_show_status(THD *thd, handlerton *db_type, enum ha_stat_type stat); /* discovery */ int ha_create_table_from_engine(THD* thd, const char *db, const char *name); @@ -970,3 +1570,22 @@ void trans_register_ha(THD *thd, bool all, handlerton *ht); int ha_repl_report_sent_binlog(THD *thd, char *log_file_name, my_off_t end_offset); int ha_repl_report_replication_stop(THD *thd); + +#ifdef HAVE_NDB_BINLOG +int ha_reset_logs(THD *thd); +int ha_binlog_index_purge_file(THD *thd, const char *file); +void ha_reset_slave(THD *thd); +void ha_binlog_log_query(THD *thd, const handlerton *db_type, + enum_binlog_command binlog_command, + const char *query, uint query_length, + const char *db, const char *table_name); +void ha_binlog_wait(THD *thd); +int ha_binlog_end(THD *thd); +#else +#define ha_reset_logs(a) 0 +#define ha_binlog_index_purge_file(a,b) 0 +#define ha_reset_slave(a) +#define ha_binlog_log_query(a,b,c,d,e,f,g); +#define ha_binlog_wait(a) +#define ha_binlog_end(a) 0 +#endif diff --git a/sql/handlerton-win.cc b/sql/handlerton-win.cc new file mode 100644 index 00000000000..9ce4eab2444 --- /dev/null +++ b/sql/handlerton-win.cc @@ -0,0 +1,72 @@ +#include "mysql_priv.h" + +extern handlerton heap_hton; +extern handlerton myisam_hton; +extern handlerton myisammrg_hton; +extern handlerton binlog_hton; +#ifdef WITH_INNOBASE_STORAGE_ENGINE +extern handlerton innobase_hton; +#endif +#ifdef WITH_BERKELEY_STORAGE_ENGINE +extern handlerton berkeley_hton; +#endif +#ifdef WITH_EXAMPLE_STORAGE_ENGINE +extern handlerton example_hton; +#endif +#ifdef WITH_ARCHIVE_STORAGE_ENGINE +extern handlerton archive_hton; +#endif +#ifdef WITH_CSV_STORAGE_ENGINE +extern handlerton tina_hton; +#endif +#ifdef WITH_BLACKHOLE_STORAGE_ENGINE +extern handlerton blackhole_hton; +#endif +#ifdef WITH_FEDERATED_STORAGE_ENGINE +extern handlerton federated_hton; +#endif +#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE +extern handlerton ndbcluster_hton; +#endif +#ifdef WITH_PARTITION_STORAGE_ENGINE +extern handlerton partition_hton; +#endif + +/* + This array is used for processing compiled in engines. +*/ +handlerton *sys_table_types[]= +{ + &heap_hton, + &myisam_hton, +#ifdef WITH_INNOBASE_STORAGE_ENGINE + &innobase_hton, +#endif +#ifdef WITH_BERKELEY_STORAGE_ENGINE + &berkeley_hton, +#endif +#ifdef WITH_EXAMPLE_STORAGE_ENGINE + &example_hton, +#endif +#ifdef WITH_ARCHIVE_STORAGE_ENGINE + &archive_hton, +#endif +#ifdef WITH_CSV_STORAGE_ENGINE + &tina_hton, +#endif +#ifdef WITH_BLACKHOLE_STORAGE_ENGINE + &blackhole_hton, +#endif +#ifdef WITH_FEDERATED_STORAGE_ENGINE + &federated_hton, +#endif +#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE + &ndbcluster_hton, +#endif +#ifdef WITH_PARTITION_STORAGE_ENGINE + &partition_hton, +#endif + &myisammrg_hton, + &binlog_hton, + NULL +}; diff --git a/sql/handlerton.cc.in b/sql/handlerton.cc.in new file mode 100644 index 00000000000..55af8cdd8cf --- /dev/null +++ b/sql/handlerton.cc.in @@ -0,0 +1,14 @@ + +#include "mysql_priv.h" + +extern handlerton heap_hton,myisam_hton,myisammrg_hton, + binlog_hton@mysql_se_decls@; + +/* + This array is used for processing compiled in engines. +*/ +handlerton *sys_table_types[]= +{ + &heap_hton,&myisam_hton@mysql_se_htons@,&myisammrg_hton,&binlog_hton,NULL +}; + diff --git a/sql/init.cc b/sql/init.cc index e53eeab8902..9f975296cb6 100644 --- a/sql/init.cc +++ b/sql/init.cc @@ -39,6 +39,7 @@ void unireg_init(ulong options) #endif VOID(strmov(reg_ext,".frm")); + reg_ext_length= 4; specialflag=SPECIAL_SAME_DB_NAME | options; /* Set options from argv */ /* Make a tab of powers of 10 */ for (i=0,nr=1.0; i < array_elements(log_10) ; i++) diff --git a/sql/item.cc b/sql/item.cc index 3d454969c1d..bbe7f3d2f9e 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -1439,20 +1439,21 @@ bool agg_item_charsets(DTCollation &coll, const char *fname, Item_field::Item_field(Field *f) :Item_ident(0, NullS, *f->table_name, f->field_name), - item_equal(0), no_const_subst(0), + item_equal(0), no_const_subst(0), have_privileges(0), any_privileges(0) { set_field(f); /* - field_name and talbe_name should not point to garbage + field_name and table_name should not point to garbage if this item is to be reused */ orig_table_name= orig_field_name= ""; } + Item_field::Item_field(THD *thd, Name_resolution_context *context_arg, Field *f) - :Item_ident(context_arg, f->table->s->db, *f->table_name, f->field_name), + :Item_ident(context_arg, f->table->s->db.str, *f->table_name, f->field_name), item_equal(0), no_const_subst(0), have_privileges(0), any_privileges(0) { @@ -1519,7 +1520,7 @@ void Item_field::set_field(Field *field_par) max_length= field_par->max_length(); table_name= *field_par->table_name; field_name= field_par->field_name; - db_name= field_par->table->s->db; + db_name= field_par->table->s->db.str; alias_name_used= field_par->table->alias_name_used; unsigned_flag=test(field_par->flags & UNSIGNED_FLAG); collation.set(field_par->charset(), DERIVATION_IMPLICIT); @@ -3576,13 +3577,18 @@ bool Item_field::fix_fields(THD *thd, Item **reference) set_if_bigger(thd->lex->in_sum_func->max_arg_level, thd->lex->current_select->nest_level); } - else if (thd->set_query_id && field->query_id != thd->query_id) + else if (thd->set_query_id) { - /* We only come here in unions */ - TABLE *table=field->table; - field->query_id=thd->query_id; - table->used_fields++; - table->used_keys.intersect(field->part_of_key); + TABLE *table= field->table; + table->file->ha_set_bit_in_rw_set(field->fieldnr, + (bool)(thd->set_query_id-1)); + if (field->query_id != thd->query_id) + { + /* We only come here in unions */ + field->query_id=thd->query_id; + table->used_fields++; + table->used_keys.intersect(field->part_of_key); + } } #ifndef NO_EMBEDDED_ACCESS_CHECKS if (any_privileges) @@ -3836,15 +3842,20 @@ enum_field_types Item::field_type() const Field *Item::make_string_field(TABLE *table) { + Field *field; DBUG_ASSERT(collation.collation); if (max_length/collation.collation->mbmaxlen > CONVERT_IF_BIGGER_TO_BLOB) - return new Field_blob(max_length, maybe_null, name, table, + field= new Field_blob(max_length, maybe_null, name, collation.collation); - if (max_length > 0) - return new Field_varstring(max_length, maybe_null, name, table, + else if (max_length > 0) + field= new Field_varstring(max_length, maybe_null, name, table->s, collation.collation); - return new Field_string(max_length, maybe_null, name, table, - collation.collation); + else + field= new Field_string(max_length, maybe_null, name, + collation.collation); + if (field) + field->init(table); + return field; } @@ -3852,73 +3863,95 @@ Field *Item::make_string_field(TABLE *table) Create a field based on field_type of argument For now, this is only used to create a field for - IFNULL(x,something) + IFNULL(x,something) and time functions RETURN 0 error # Created field */ -Field *Item::tmp_table_field_from_field_type(TABLE *table) +Field *Item::tmp_table_field_from_field_type(TABLE *table, bool fixed_length) { /* The field functions defines a field to be not null if null_ptr is not 0 */ uchar *null_ptr= maybe_null ? (uchar*) "" : 0; + Field *field; switch (field_type()) { case MYSQL_TYPE_DECIMAL: case MYSQL_TYPE_NEWDECIMAL: - return new Field_new_decimal((char*) 0, max_length, null_ptr, 0, - Field::NONE, name, table, decimals, 0, + field= new Field_new_decimal((char*) 0, max_length, null_ptr, 0, + Field::NONE, name, decimals, 0, unsigned_flag); + break; case MYSQL_TYPE_TINY: - return new Field_tiny((char*) 0, max_length, null_ptr, 0, Field::NONE, - name, table, 0, unsigned_flag); + field= new Field_tiny((char*) 0, max_length, null_ptr, 0, Field::NONE, + name, 0, unsigned_flag); + break; case MYSQL_TYPE_SHORT: - return new Field_short((char*) 0, max_length, null_ptr, 0, Field::NONE, - name, table, 0, unsigned_flag); + field= new Field_short((char*) 0, max_length, null_ptr, 0, Field::NONE, + name, 0, unsigned_flag); + break; case MYSQL_TYPE_LONG: - return new Field_long((char*) 0, max_length, null_ptr, 0, Field::NONE, - name, table, 0, unsigned_flag); + field= new Field_long((char*) 0, max_length, null_ptr, 0, Field::NONE, + name, 0, unsigned_flag); + break; #ifdef HAVE_LONG_LONG case MYSQL_TYPE_LONGLONG: - return new Field_longlong((char*) 0, max_length, null_ptr, 0, Field::NONE, - name, table, 0, unsigned_flag); + field= new Field_longlong((char*) 0, max_length, null_ptr, 0, Field::NONE, + name, 0, unsigned_flag); + break; #endif case MYSQL_TYPE_FLOAT: - return new Field_float((char*) 0, max_length, null_ptr, 0, Field::NONE, - name, table, decimals, 0, unsigned_flag); + field= new Field_float((char*) 0, max_length, null_ptr, 0, Field::NONE, + name, decimals, 0, unsigned_flag); + break; case MYSQL_TYPE_DOUBLE: - return new Field_double((char*) 0, max_length, null_ptr, 0, Field::NONE, - name, table, decimals, 0, unsigned_flag); + field= new Field_double((char*) 0, max_length, null_ptr, 0, Field::NONE, + name, decimals, 0, unsigned_flag); + break; case MYSQL_TYPE_NULL: - return new Field_null((char*) 0, max_length, Field::NONE, - name, table, &my_charset_bin); + field= new Field_null((char*) 0, max_length, Field::NONE, + name, &my_charset_bin); + break; case MYSQL_TYPE_INT24: - return new Field_medium((char*) 0, max_length, null_ptr, 0, Field::NONE, - name, table, 0, unsigned_flag); + field= new Field_medium((char*) 0, max_length, null_ptr, 0, Field::NONE, + name, 0, unsigned_flag); + break; case MYSQL_TYPE_NEWDATE: case MYSQL_TYPE_DATE: - return new Field_date(maybe_null, name, table, &my_charset_bin); + field= new Field_date(maybe_null, name, &my_charset_bin); + break; case MYSQL_TYPE_TIME: - return new Field_time(maybe_null, name, table, &my_charset_bin); + field= new Field_time(maybe_null, name, &my_charset_bin); + break; case MYSQL_TYPE_TIMESTAMP: case MYSQL_TYPE_DATETIME: - return new Field_datetime(maybe_null, name, table, &my_charset_bin); + field= new Field_datetime(maybe_null, name, &my_charset_bin); + break; case MYSQL_TYPE_YEAR: - return new Field_year((char*) 0, max_length, null_ptr, 0, Field::NONE, - name, table); + field= new Field_year((char*) 0, max_length, null_ptr, 0, Field::NONE, + name); + break; case MYSQL_TYPE_BIT: - return new Field_bit_as_char(NULL, max_length, null_ptr, 0, - Field::NONE, name, table); + field= new Field_bit_as_char(NULL, max_length, null_ptr, 0, + Field::NONE, name); + break; default: /* This case should never be chosen */ DBUG_ASSERT(0); /* If something goes awfully wrong, it's better to get a string than die */ + case MYSQL_TYPE_STRING: + if (fixed_length && max_length < CONVERT_IF_BIGGER_TO_BLOB) + { + field= new Field_string(max_length, maybe_null, name, + collation.collation); + break; + } + /* Fall through to make_string_field() */ case MYSQL_TYPE_ENUM: case MYSQL_TYPE_SET: - case MYSQL_TYPE_STRING: case MYSQL_TYPE_VAR_STRING: case MYSQL_TYPE_VARCHAR: return make_string_field(table); @@ -3927,10 +3960,12 @@ Field *Item::tmp_table_field_from_field_type(TABLE *table) case MYSQL_TYPE_LONG_BLOB: case MYSQL_TYPE_BLOB: case MYSQL_TYPE_GEOMETRY: - return new Field_blob(max_length, maybe_null, name, table, - collation.collation); + field= new Field_blob(max_length, maybe_null, name, collation.collation); break; // Blob handled outside of case } + if (field) + field->init(table); + return field; } @@ -5168,8 +5203,9 @@ bool Item_default_value::fix_fields(THD *thd, Item **items) if (!(def_field= (Field*) sql_alloc(field_arg->field->size_of()))) goto error; memcpy(def_field, field_arg->field, field_arg->field->size_of()); - def_field->move_field(def_field->table->s->default_values - - def_field->table->record[0]); + def_field->move_field_offset((my_ptrdiff_t) + (def_field->table->s->default_values - + def_field->table->record[0])); set_field(def_field); return FALSE; @@ -5271,16 +5307,22 @@ bool Item_insert_value::fix_fields(THD *thd, Item **items) if (!def_field) return TRUE; memcpy(def_field, field_arg->field, field_arg->field->size_of()); - def_field->move_field(def_field->table->insert_values - - def_field->table->record[0]); + def_field->move_field_offset((my_ptrdiff_t) + (def_field->table->insert_values - + def_field->table->record[0])); set_field(def_field); } else { Field *tmp_field= field_arg->field; /* charset doesn't matter here, it's to avoid sigsegv only */ - set_field(new Field_null(0, 0, Field::NONE, tmp_field->field_name, - tmp_field->table, &my_charset_bin)); + tmp_field= new Field_null(0, 0, Field::NONE, field_arg->field->field_name, + &my_charset_bin); + if (tmp_field) + { + tmp_field->init(field_arg->field->table); + set_field(tmp_field); + } } return FALSE; } @@ -5368,8 +5410,8 @@ bool Item_trigger_field::fix_fields(THD *thd, Item **items) table_grants->want_privilege= access_type == AT_READ ? SELECT_ACL : UPDATE_ACL; - if (check_grant_column(thd, table_grants, triggers->table->s->db, - triggers->table->s->table_name, field_name, + if (check_grant_column(thd, table_grants, triggers->table->s->db.str, + triggers->table->s->table_name.str, field_name, strlen(field_name), thd->security_ctx)) return TRUE; } @@ -6096,24 +6138,31 @@ Field *Item_type_holder::make_field_by_type(TABLE *table) The field functions defines a field to be not null if null_ptr is not 0 */ uchar *null_ptr= maybe_null ? (uchar*) "" : 0; - switch (fld_type) - { + Field *field; + + switch (fld_type) { case MYSQL_TYPE_ENUM: DBUG_ASSERT(enum_set_typelib); - return new Field_enum((char *) 0, max_length, null_ptr, 0, + field= new Field_enum((char *) 0, max_length, null_ptr, 0, Field::NONE, name, - table, get_enum_pack_length(enum_set_typelib->count), + get_enum_pack_length(enum_set_typelib->count), enum_set_typelib, collation.collation); + if (field) + field->init(table); + return field; case MYSQL_TYPE_SET: DBUG_ASSERT(enum_set_typelib); - return new Field_set((char *) 0, max_length, null_ptr, 0, + field= new Field_set((char *) 0, max_length, null_ptr, 0, Field::NONE, name, - table, get_set_pack_length(enum_set_typelib->count), + get_set_pack_length(enum_set_typelib->count), enum_set_typelib, collation.collation); + if (field) + field->init(table); + return field; default: break; } - return tmp_table_field_from_field_type(table); + return tmp_table_field_from_field_type(table, 0); } diff --git a/sql/item.h b/sql/item.h index 2e3e0acc408..b9ce418858c 100644 --- a/sql/item.h +++ b/sql/item.h @@ -370,6 +370,35 @@ public: } }; + +/* + This enum is used to report information about monotonicity of function + represented by Item* tree. + Monotonicity is defined only for Item* trees that represent table + partitioning expressions (i.e. have no subselects/user vars/PS parameters + etc etc). An Item* tree is assumed to have the same monotonicity properties + as its correspoinding function F: + + [signed] longlong F(field1, field2, ...) { + put values of field_i into table record buffer; + return item->val_int(); + } + + NOTE + At the moment function monotonicity is not well defined (and so may be + incorrect) for Item trees with parameters/return types that are different + from INT_RESULT, may be NULL, or are unsigned. + It will be possible to address this issue once the related partitioning bugs + (BUG#16002, BUG#15447, BUG#13436) are fixed. +*/ + +typedef enum monotonicity_info +{ + NON_MONOTONIC, /* none of the below holds */ + MONOTONIC_INCREASING, /* F() is unary and (x < y) => (F(x) <= F(y)) */ + MONOTONIC_STRICT_INCREASING /* F() is unary and (x < y) => (F(x) < F(y)) */ +} enum_monotonicity_info; + /*************************************************************************/ typedef bool (Item::*Item_processor)(byte *arg); @@ -395,6 +424,7 @@ public: FIELD_VARIANCE_ITEM, INSERT_VALUE_ITEM, SUBSELECT_ITEM, ROW_ITEM, CACHE_ITEM, TYPE_HOLDER, PARAM_ITEM, TRIGGER_FIELD_ITEM, DECIMAL_ITEM, + XPATH_NODESET, XPATH_NODESET_CMP, VIEW_FIXER_ITEM}; enum cond_result { COND_UNDEF,COND_OK,COND_TRUE,COND_FALSE }; @@ -467,6 +497,15 @@ public: virtual Item_result cast_to_int_type() const { return result_type(); } virtual enum_field_types field_type() const; virtual enum Type type() const =0; + + /* + Return information about function monotonicity. See comment for + enum_monotonicity_info for details. This function can only be called + after fix_fields() call. + */ + virtual enum_monotonicity_info get_monotonicity_info() const + { return NON_MONOTONIC; } + /* valXXX methods must return NULL or 0 or 0.0 if null_value is set. */ /* Return double precision floating point representation of item. @@ -557,6 +596,7 @@ public: TRUE value is true (not equal to 0) */ virtual bool val_bool(); + virtual String *val_nodeset(String*) { return 0; } /* Helper functions, see item_sum.cc */ String *val_string_from_real(String *str); String *val_string_from_int(String *str); @@ -731,7 +771,7 @@ public: // used in row subselects to get value of elements virtual void bring_value() {} - Field *tmp_table_field_from_field_type(TABLE *table); + Field *tmp_table_field_from_field_type(TABLE *table, bool fixed_length); virtual Item_field *filed_for_view_update() { return 0; } virtual Item *neg_transformer(THD *thd) { return NULL; } @@ -1141,6 +1181,10 @@ public: { return field->type(); } + enum_monotonicity_info get_monotonicity_info() const + { + return MONOTONIC_STRICT_INCREASING; + } Field *get_tmp_table_field() { return result_field; } Field *tmp_table_field(TABLE *t_arg) { return result_field; } bool get_date(TIME *ltime,uint fuzzydate); @@ -1899,7 +1943,7 @@ public: virtual Item *real_item() { return ref; } }; - +#ifdef MYSQL_SERVER #include "gstream.h" #include "spatial.h" #include "item_sum.h" @@ -1911,6 +1955,8 @@ public: #include "item_timefunc.h" #include "item_uniq.h" #include "item_subselect.h" +#include "item_xmlfunc.h" +#endif class Item_copy_string :public Item { diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index 7ba8a536ac7..592b7e9dc6c 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -1233,7 +1233,7 @@ enum_field_types Item_func_ifnull::field_type() const Field *Item_func_ifnull::tmp_table_field(TABLE *table) { - return tmp_table_field_from_field_type(table); + return tmp_table_field_from_field_type(table, 0); } double @@ -2862,7 +2862,7 @@ longlong Item_is_not_null_test::val_int() } if (args[0]->is_null()) { - DBUG_PRINT("info", ("null")) + DBUG_PRINT("info", ("null")); owner->was_null|= 1; DBUG_RETURN(0); } diff --git a/sql/item_create.cc b/sql/item_create.cc index 342ef245a76..17f1fbca471 100644 --- a/sql/item_create.cc +++ b/sql/item_create.cc @@ -462,7 +462,6 @@ Item *create_func_cast(Item *a, Cast_target cast_type, int len, int dec, CHARSET_INFO *cs) { Item *res; - LINT_INIT(res); switch (cast_type) { case ITEM_CAST_BINARY: res= new Item_func_binary(a); break; @@ -478,6 +477,10 @@ Item *create_func_cast(Item *a, Cast_target cast_type, int len, int dec, res= new Item_char_typecast(a, len, cs ? cs : current_thd->variables.collation_connection); break; + default: + DBUG_ASSERT(0); + res= 0; + break; } return res; } @@ -499,6 +502,16 @@ Item *create_func_quote(Item* a) return new Item_func_quote(a); } +Item *create_func_xml_extractvalue(Item *a, Item *b) +{ + return new Item_func_xml_extractvalue(a, b); +} + +Item *create_func_xml_update(Item *a, Item *b, Item *c) +{ + return new Item_func_xml_update(a, b, c); +} + #ifdef HAVE_SPATIAL Item *create_func_as_wkt(Item *a) { diff --git a/sql/item_create.h b/sql/item_create.h index 35db9be3c89..c76dc6b9ad7 100644 --- a/sql/item_create.h +++ b/sql/item_create.h @@ -102,7 +102,8 @@ Item *create_load_file(Item* a); Item *create_func_is_free_lock(Item* a); Item *create_func_is_used_lock(Item* a); Item *create_func_quote(Item* a); - +Item *create_func_xml_extractvalue(Item *a, Item *b); +Item *create_func_xml_update(Item *a, Item *b, Item *c); #ifdef HAVE_SPATIAL Item *create_func_geometry_from_text(Item *a); diff --git a/sql/item_func.cc b/sql/item_func.cc index a85f05c2e22..d2e0911557f 100644 --- a/sql/item_func.cc +++ b/sql/item_func.cc @@ -362,41 +362,43 @@ bool Item_func::eq(const Item *item, bool binary_cmp) const } -Field *Item_func::tmp_table_field(TABLE *t_arg) +Field *Item_func::tmp_table_field(TABLE *table) { - Field *res; - LINT_INIT(res); + Field *field; + LINT_INIT(field); switch (result_type()) { case INT_RESULT: if (max_length > 11) - res= new Field_longlong(max_length, maybe_null, name, t_arg, - unsigned_flag); + field= new Field_longlong(max_length, maybe_null, name, unsigned_flag); else - res= new Field_long(max_length, maybe_null, name, t_arg, - unsigned_flag); + field= new Field_long(max_length, maybe_null, name, unsigned_flag); break; case REAL_RESULT: - res= new Field_double(max_length, maybe_null, name, t_arg, decimals); + field= new Field_double(max_length, maybe_null, name, decimals); break; case STRING_RESULT: - res= make_string_field(t_arg); + return make_string_field(table); break; case DECIMAL_RESULT: - res= new Field_new_decimal(my_decimal_precision_to_length(decimal_precision(), - decimals, - unsigned_flag), - maybe_null, name, t_arg, decimals, unsigned_flag); + field= new Field_new_decimal(my_decimal_precision_to_length(decimal_precision(), + decimals, + unsigned_flag), + maybe_null, name, decimals, unsigned_flag); break; case ROW_RESULT: default: // This case should never be chosen DBUG_ASSERT(0); + field= 0; break; } - return res; + if (field) + field->init(table); + return field; } + my_decimal *Item_func::val_decimal(my_decimal *decimal_value) { DBUG_ASSERT(fixed); @@ -895,7 +897,7 @@ String *Item_decimal_typecast::val_str(String *str) my_decimal tmp_buf, *tmp= val_decimal(&tmp_buf); if (null_value) return NULL; - my_decimal2string(E_DEC_FATAL_ERROR, tmp, 0, 0, 0, str); + my_decimal2string(E_DEC_FATAL_ERROR, &tmp_buf, 0, 0, 0, str); return str; } @@ -2655,6 +2657,7 @@ udf_handler::fix_fields(THD *thd, Item_result_field *func, u_d->name.str, ER(ER_UNKNOWN_ERROR)); DBUG_RETURN(TRUE); } + thd->set_current_stmt_binlog_row_based_if_mixed(); DBUG_RETURN(FALSE); } @@ -4646,7 +4649,8 @@ Item_func_sp::Item_func_sp(Name_resolution_context *context_arg, sp_name *name) { maybe_null= 1; m_name->init_qname(current_thd); - dummy_table= (TABLE*) sql_calloc(sizeof(TABLE)); + dummy_table= (TABLE*) sql_calloc(sizeof(TABLE)+ sizeof(TABLE_SHARE)); + dummy_table->s= (TABLE_SHARE*) (dummy_table+1); } @@ -4657,9 +4661,11 @@ Item_func_sp::Item_func_sp(Name_resolution_context *context_arg, { maybe_null= 1; m_name->init_qname(current_thd); - dummy_table= (TABLE*) sql_calloc(sizeof(TABLE)); + dummy_table= (TABLE*) sql_calloc(sizeof(TABLE)+ sizeof(TABLE_SHARE)); + dummy_table->s= (TABLE_SHARE*) (dummy_table+1); } + void Item_func_sp::cleanup() { @@ -4714,16 +4720,15 @@ Item_func_sp::sp_result_field(void) const DBUG_RETURN(0); } } - if (!dummy_table->s) + if (!dummy_table->alias) { char *empty_name= (char *) ""; - TABLE_SHARE *share; - dummy_table->s= share= &dummy_table->share_not_to_be_used; - dummy_table->alias = empty_name; - dummy_table->maybe_null = maybe_null; + dummy_table->alias= empty_name; + dummy_table->maybe_null= maybe_null; dummy_table->in_use= current_thd; - share->table_cache_key = empty_name; - share->table_name = empty_name; + dummy_table->s->table_cache_key.str = empty_name; + dummy_table->s->table_name.str= empty_name; + dummy_table->s->db.str= empty_name; } field= m_sp->create_result_field(max_length, name, dummy_table); DBUG_RETURN(field); @@ -4751,13 +4756,8 @@ Item_func_sp::execute(Field **flp) if (!(f= *flp)) { - if (!(*flp= f= sp_result_field())) - { - my_message(ER_OUT_OF_RESOURCES, ER(ER_OUT_OF_RESOURCES), MYF(0)); - return 0; - } - - f->move_field((f->pack_length() > sizeof(result_buf)) ? + *flp= f= sp_result_field(); + f->move_field((f->pack_length() > sizeof(result_buf)) ? sql_alloc(f->pack_length()) : result_buf); f->null_ptr= (uchar *)&null_value; f->null_bit= 1; diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc index a3e47154bc3..eb89eb7708c 100644 --- a/sql/item_strfunc.cc +++ b/sql/item_strfunc.cc @@ -3002,6 +3002,7 @@ String *Item_func_uuid::val_str(String *str) char *s; THD *thd= current_thd; + thd->set_current_stmt_binlog_row_based_if_mixed(); pthread_mutex_lock(&LOCK_uuid_generator); if (! uuid_time) /* first UUID() call. initializing data */ { diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc index 88620634354..68f189ccf8c 100644 --- a/sql/item_subselect.cc +++ b/sql/item_subselect.cc @@ -133,6 +133,7 @@ Item_subselect::select_transformer(JOIN *join) bool Item_subselect::fix_fields(THD *thd_param, Item **ref) { char const *save_where= thd_param->where; + uint8 uncacheable; bool res; DBUG_ASSERT(fixed == 0); @@ -178,15 +179,17 @@ bool Item_subselect::fix_fields(THD *thd_param, Item **ref) fix_length_and_dec(); } else - return 1; - uint8 uncacheable= engine->uncacheable(); - if (uncacheable) + goto err; + + if ((uncacheable= engine->uncacheable())) { const_item_cache= 0; if (uncacheable & UNCACHEABLE_RAND) used_tables_cache|= RAND_TABLE_BIT; } fixed= 1; + +err: thd->where= save_where; return res; } @@ -1613,7 +1616,7 @@ int subselect_uniquesubquery_engine::exec() } if (!table->file->inited) - table->file->ha_index_init(tab->ref.key); + table->file->ha_index_init(tab->ref.key, 0); error= table->file->index_read(table->record[0], tab->ref.key_buff, tab->ref.key_length,HA_READ_KEY_EXACT); @@ -1666,7 +1669,7 @@ int subselect_indexsubquery_engine::exec() } if (!table->file->inited) - table->file->ha_index_init(tab->ref.key); + table->file->ha_index_init(tab->ref.key, 1); error= table->file->index_read(table->record[0], tab->ref.key_buff, tab->ref.key_length,HA_READ_KEY_EXACT); @@ -1801,7 +1804,7 @@ void subselect_uniquesubquery_engine::print(String *str) str->append(STRING_WITH_LEN("<primary_index_lookup>(")); tab->ref.items[0]->print(str); str->append(STRING_WITH_LEN(" in ")); - str->append(tab->table->s->table_name); + str->append(tab->table->s->table_name.str, tab->table->s->table_name.length); KEY *key_info= tab->table->key_info+ tab->ref.key; str->append(STRING_WITH_LEN(" on ")); str->append(key_info->name); @@ -1819,7 +1822,7 @@ void subselect_indexsubquery_engine::print(String *str) str->append(STRING_WITH_LEN("<index_lookup>(")); tab->ref.items[0]->print(str); str->append(STRING_WITH_LEN(" in ")); - str->append(tab->table->s->table_name); + str->append(tab->table->s->table_name.str, tab->table->s->table_name.length); KEY *key_info= tab->table->key_info+ tab->ref.key; str->append(STRING_WITH_LEN(" on ")); str->append(key_info->name); diff --git a/sql/item_sum.cc b/sql/item_sum.cc index a3a25ec8d6f..dc1cf6cc8b7 100644 --- a/sql/item_sum.cc +++ b/sql/item_sum.cc @@ -371,26 +371,33 @@ bool Item_sum::walk (Item_processor processor, byte *argument) Field *Item_sum::create_tmp_field(bool group, TABLE *table, uint convert_blob_length) { + Field *field; switch (result_type()) { case REAL_RESULT: - return new Field_double(max_length,maybe_null,name,table,decimals); + field= new Field_double(max_length, maybe_null, name, decimals); + break; case INT_RESULT: - return new Field_longlong(max_length,maybe_null,name,table,unsigned_flag); + field= new Field_longlong(max_length, maybe_null, name, unsigned_flag); + break; case STRING_RESULT: - if (max_length > 255 && convert_blob_length) - return new Field_varstring(convert_blob_length, maybe_null, - name, table, - collation.collation); - return make_string_field(table); + if (max_length <= 255 || !convert_blob_length) + return make_string_field(table); + field= new Field_varstring(convert_blob_length, maybe_null, + name, table->s, collation.collation); + break; case DECIMAL_RESULT: - return new Field_new_decimal(max_length, maybe_null, name, table, + field= new Field_new_decimal(max_length, maybe_null, name, decimals, unsigned_flag); + break; case ROW_RESULT: default: // This case should never be choosen DBUG_ASSERT(0); return 0; } + if (field) + field->init(table); + return field; } @@ -538,9 +545,10 @@ Item_sum_hybrid::fix_fields(THD *thd, Item **ref) Field *Item_sum_hybrid::create_tmp_field(bool group, TABLE *table, uint convert_blob_length) { + Field *field; if (args[0]->type() == Item::FIELD_ITEM) { - Field *field= ((Item_field*) args[0])->field; + field= ((Item_field*) args[0])->field; if ((field= create_tmp_field_from_field(current_thd, field, name, table, NULL, convert_blob_length))) @@ -554,16 +562,21 @@ Field *Item_sum_hybrid::create_tmp_field(bool group, TABLE *table, */ switch (args[0]->field_type()) { case MYSQL_TYPE_DATE: - return new Field_date(maybe_null, name, table, collation.collation); + field= new Field_date(maybe_null, name, collation.collation); + break; case MYSQL_TYPE_TIME: - return new Field_time(maybe_null, name, table, collation.collation); + field= new Field_time(maybe_null, name, collation.collation); + break; case MYSQL_TYPE_TIMESTAMP: case MYSQL_TYPE_DATETIME: - return new Field_datetime(maybe_null, name, table, collation.collation); - default: + field= new Field_datetime(maybe_null, name, collation.collation); break; + default: + return Item_sum::create_tmp_field(group, table, convert_blob_length); } - return Item_sum::create_tmp_field(group, table, convert_blob_length); + if (field) + field->init(table); + return field; } @@ -1065,6 +1078,7 @@ Item *Item_sum_avg::copy_or_same(THD* thd) Field *Item_sum_avg::create_tmp_field(bool group, TABLE *table, uint convert_blob_len) { + Field *field; if (group) { /* @@ -1072,14 +1086,18 @@ Field *Item_sum_avg::create_tmp_field(bool group, TABLE *table, The easyest way is to do this is to store both value in a string and unpack on access. */ - return new Field_string(((hybrid_type == DECIMAL_RESULT) ? + field= new Field_string(((hybrid_type == DECIMAL_RESULT) ? dec_bin_size : sizeof(double)) + sizeof(longlong), - 0, name, table, &my_charset_bin); + 0, name, &my_charset_bin); } - if (hybrid_type == DECIMAL_RESULT) - return new Field_new_decimal(max_length, maybe_null, name, table, + else if (hybrid_type == DECIMAL_RESULT) + field= new Field_new_decimal(max_length, maybe_null, name, decimals, unsigned_flag); - return new Field_double(max_length, maybe_null, name, table, decimals); + else + field= new Field_double(max_length, maybe_null, name, decimals); + if (field) + field->init(table); + return field; } @@ -1244,6 +1262,7 @@ Item *Item_sum_variance::copy_or_same(THD* thd) Field *Item_sum_variance::create_tmp_field(bool group, TABLE *table, uint convert_blob_len) { + Field *field; if (group) { /* @@ -1251,15 +1270,19 @@ Field *Item_sum_variance::create_tmp_field(bool group, TABLE *table, The easyest way is to do this is to store both value in a string and unpack on access. */ - return new Field_string(((hybrid_type == DECIMAL_RESULT) ? + field= new Field_string(((hybrid_type == DECIMAL_RESULT) ? dec_bin_size0 + dec_bin_size1 : sizeof(double)*2) + sizeof(longlong), - 0, name, table, &my_charset_bin); + 0, name, &my_charset_bin); } - if (hybrid_type == DECIMAL_RESULT) - return new Field_new_decimal(max_length, maybe_null, name, table, + else if (hybrid_type == DECIMAL_RESULT) + field= new Field_new_decimal(max_length, maybe_null, name, decimals, unsigned_flag); - return new Field_double(max_length, maybe_null,name,table,decimals); + else + field= new Field_double(max_length, maybe_null, name, decimals); + if (field) + field->init(table); + return field; } @@ -2524,7 +2547,7 @@ bool Item_sum_count_distinct::setup(THD *thd) table->file->extra(HA_EXTRA_NO_ROWS); // Don't update rows table->no_rows=1; - if (table->s->db_type == DB_TYPE_HEAP) + if (table->s->db_type == &heap_hton) { /* No blobs, otherwise it would have been MyISAM: set up a compare @@ -2639,7 +2662,7 @@ bool Item_sum_count_distinct::add() */ return tree->unique_add(table->record[0] + table->s->null_bytes); } - if ((error= table->file->write_row(table->record[0])) && + if ((error= table->file->ha_write_row(table->record[0])) && error != HA_ERR_FOUND_DUPP_KEY && error != HA_ERR_FOUND_DUPP_UNIQUE) return TRUE; diff --git a/sql/item_timefunc.cc b/sql/item_timefunc.cc index 63a7f1f130b..c4f7dec70d0 100644 --- a/sql/item_timefunc.cc +++ b/sql/item_timefunc.cc @@ -885,6 +885,34 @@ longlong Item_func_to_days::val_int() return (longlong) calc_daynr(ltime.year,ltime.month,ltime.day); } + +/* + Get information about this Item tree monotonicity + + SYNOPSIS + Item_func_to_days::get_monotonicity_info() + + DESCRIPTION + Get information about monotonicity of the function represented by this item + tree. + + RETURN + See enum_monotonicity_info. +*/ + +enum_monotonicity_info Item_func_to_days::get_monotonicity_info() const +{ + if (args[0]->type() == Item::FIELD_ITEM) + { + if (args[0]->field_type() == MYSQL_TYPE_DATE) + return MONOTONIC_STRICT_INCREASING; + if (args[0]->field_type() == MYSQL_TYPE_DATETIME) + return MONOTONIC_INCREASING; + } + return NON_MONOTONIC; +} + + longlong Item_func_dayofyear::val_int() { DBUG_ASSERT(fixed == 1); @@ -1068,6 +1096,29 @@ longlong Item_func_year::val_int() } +/* + Get information about this Item tree monotonicity + + SYNOPSIS + Item_func_to_days::get_monotonicity_info() + + DESCRIPTION + Get information about monotonicity of the function represented by this item + tree. + + RETURN + See enum_monotonicity_info. +*/ + +enum_monotonicity_info Item_func_year::get_monotonicity_info() const +{ + if (args[0]->type() == Item::FIELD_ITEM && + (args[0]->field_type() == MYSQL_TYPE_DATE || + args[0]->field_type() == MYSQL_TYPE_DATETIME)) + return MONOTONIC_INCREASING; + return NON_MONOTONIC; +} + longlong Item_func_unix_timestamp::val_int() { TIME ltime; @@ -1114,7 +1165,7 @@ longlong Item_func_time_to_sec::val_int() To make code easy, allow interval objects without separators. */ -static bool get_interval_value(Item *args,interval_type int_type, +bool get_interval_value(Item *args,interval_type int_type, String *str_value, INTERVAL *interval) { ulonglong array[5]; @@ -1980,110 +2031,15 @@ bool Item_date_add_interval::get_date(TIME *ltime, uint fuzzy_date) long period,sign; INTERVAL interval; - ltime->neg= 0; if (args[0]->get_date(ltime, TIME_NO_ZERO_DATE) || get_interval_value(args[1],int_type,&value,&interval)) goto null_date; - sign= (interval.neg ? -1 : 1); + if (date_sub_interval) - sign = -sign; + interval.neg = !interval.neg; - null_value=0; - switch (int_type) { - case INTERVAL_SECOND: - case INTERVAL_SECOND_MICROSECOND: - case INTERVAL_MICROSECOND: - case INTERVAL_MINUTE: - case INTERVAL_HOUR: - case INTERVAL_MINUTE_MICROSECOND: - case INTERVAL_MINUTE_SECOND: - case INTERVAL_HOUR_MICROSECOND: - case INTERVAL_HOUR_SECOND: - case INTERVAL_HOUR_MINUTE: - case INTERVAL_DAY_MICROSECOND: - case INTERVAL_DAY_SECOND: - case INTERVAL_DAY_MINUTE: - case INTERVAL_DAY_HOUR: - { - longlong sec, days, daynr, microseconds, extra_sec; - ltime->time_type= MYSQL_TIMESTAMP_DATETIME; // Return full date - microseconds= ltime->second_part + sign*interval.second_part; - extra_sec= microseconds/1000000L; - microseconds= microseconds%1000000L; - - sec=((ltime->day-1)*3600*24L+ltime->hour*3600+ltime->minute*60+ - ltime->second + - sign* (longlong) (interval.day*3600*24L + - interval.hour*LL(3600)+interval.minute*LL(60)+ - interval.second))+ extra_sec; - if (microseconds < 0) - { - microseconds+= LL(1000000); - sec--; - } - days= sec/(3600*LL(24)); - sec-= days*3600*LL(24); - if (sec < 0) - { - days--; - sec+= 3600*LL(24); - } - ltime->second_part= (uint) microseconds; - ltime->second= (uint) (sec % 60); - ltime->minute= (uint) (sec/60 % 60); - ltime->hour= (uint) (sec/3600); - daynr= calc_daynr(ltime->year,ltime->month,1) + days; - /* Day number from year 0 to 9999-12-31 */ - if ((ulonglong) daynr >= MAX_DAY_NUMBER) - goto invalid_date; - get_date_from_daynr((long) daynr, <ime->year, <ime->month, - <ime->day); - break; - } - case INTERVAL_DAY: - case INTERVAL_WEEK: - period= (calc_daynr(ltime->year,ltime->month,ltime->day) + - sign * (long) interval.day); - /* Daynumber from year 0 to 9999-12-31 */ - if ((ulong) period >= MAX_DAY_NUMBER) - goto invalid_date; - get_date_from_daynr((long) period,<ime->year,<ime->month,<ime->day); - break; - case INTERVAL_YEAR: - ltime->year+= sign * (long) interval.year; - if ((ulong) ltime->year >= 10000L) - goto invalid_date; - if (ltime->month == 2 && ltime->day == 29 && - calc_days_in_year(ltime->year) != 366) - ltime->day=28; // Was leap-year - break; - case INTERVAL_YEAR_MONTH: - case INTERVAL_QUARTER: - case INTERVAL_MONTH: - period= (ltime->year*12 + sign * (long) interval.year*12 + - ltime->month-1 + sign * (long) interval.month); - if ((ulong) period >= 120000L) - goto invalid_date; - ltime->year= (uint) (period / 12); - ltime->month= (uint) (period % 12L)+1; - /* Adjust day if the new month doesn't have enough days */ - if (ltime->day > days_in_month[ltime->month-1]) - { - ltime->day = days_in_month[ltime->month-1]; - if (ltime->month == 2 && calc_days_in_year(ltime->year) == 366) - ltime->day++; // Leap-year - } - break; - default: - goto null_date; - } - return 0; // Ok + return (null_value= date_add_interval(ltime, int_type, interval)); -invalid_date: - push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN, - ER_DATETIME_FUNCTION_OVERFLOW, - ER(ER_DATETIME_FUNCTION_OVERFLOW), - "datetime"); null_date: return (null_value=1); } @@ -3009,18 +2965,6 @@ get_date_time_result_type(const char *format, uint length) } -Field *Item_func_str_to_date::tmp_table_field(TABLE *t_arg) -{ - if (cached_field_type == MYSQL_TYPE_TIME) - return (new Field_time(maybe_null, name, t_arg, &my_charset_bin)); - if (cached_field_type == MYSQL_TYPE_DATE) - return (new Field_date(maybe_null, name, t_arg, &my_charset_bin)); - if (cached_field_type == MYSQL_TYPE_DATETIME) - return (new Field_datetime(maybe_null, name, t_arg, &my_charset_bin)); - return (new Field_string(max_length, maybe_null, name, t_arg, &my_charset_bin)); -} - - void Item_func_str_to_date::fix_length_and_dec() { char format_buff[64]; diff --git a/sql/item_timefunc.h b/sql/item_timefunc.h index 71f595184ec..fd2f3945fca 100644 --- a/sql/item_timefunc.h +++ b/sql/item_timefunc.h @@ -26,6 +26,9 @@ enum date_time_format_types TIME_ONLY= 0, TIME_MICROSECOND, DATE_ONLY, DATE_TIME, DATE_TIME_MICROSECOND }; +bool get_interval_value(Item *args,interval_type int_type, + String *str_value, INTERVAL *interval); + class Item_func_period_add :public Item_int_func { public: @@ -65,6 +68,7 @@ public: max_length=6*MY_CHARSET_BIN_MB_MAXLEN; maybe_null=1; } + enum_monotonicity_info get_monotonicity_info() const; }; @@ -234,6 +238,7 @@ public: Item_func_year(Item *a) :Item_int_func(a) {} longlong val_int(); const char *func_name() const { return "year"; } + enum_monotonicity_info get_monotonicity_info() const; void fix_length_and_dec() { decimals=0; @@ -340,10 +345,10 @@ public: max_length=MAX_DATE_WIDTH*MY_CHARSET_BIN_MB_MAXLEN; } int save_in_field(Field *to, bool no_conversions); - Field *tmp_table_field(TABLE *t_arg) + Field *tmp_table_field(TABLE *table) { - return (new Field_date(maybe_null, name, t_arg, &my_charset_bin)); - } + return tmp_table_field_from_field_type(table, 0); + } }; @@ -355,9 +360,9 @@ public: Item_date_func(Item *a,Item *b) :Item_str_func(a,b) {} Item_date_func(Item *a,Item *b, Item *c) :Item_str_func(a,b,c) {} enum_field_types field_type() const { return MYSQL_TYPE_DATETIME; } - Field *tmp_table_field(TABLE *t_arg) + Field *tmp_table_field(TABLE *table) { - return (new Field_datetime(maybe_null, name, t_arg, &my_charset_bin)); + return tmp_table_field_from_field_type(table, 0); } }; @@ -378,9 +383,9 @@ public: longlong val_int() { DBUG_ASSERT(fixed == 1); return value; } String *val_str(String *str); void fix_length_and_dec(); - Field *tmp_table_field(TABLE *t_arg) + Field *tmp_table_field(TABLE *table) { - return (new Field_time(maybe_null, name, t_arg, &my_charset_bin)); + return tmp_table_field_from_field_type(table, 0); } /* Abstract method that defines which time zone is used for conversion. @@ -618,26 +623,12 @@ public: } enum_field_types field_type() const { return MYSQL_TYPE_TIME; } const char *func_name() const { return "sec_to_time"; } - Field *tmp_table_field(TABLE *t_arg) + Field *tmp_table_field(TABLE *table) { - return (new Field_time(maybe_null, name, t_arg, &my_charset_bin)); + return tmp_table_field_from_field_type(table, 0); } }; -/* - The following must be sorted so that simple intervals comes first. - (get_interval_value() depends on this) -*/ - -enum interval_type -{ - INTERVAL_YEAR, INTERVAL_QUARTER, INTERVAL_MONTH, INTERVAL_DAY, INTERVAL_HOUR, - INTERVAL_MINUTE, INTERVAL_WEEK, INTERVAL_SECOND, INTERVAL_MICROSECOND , - INTERVAL_YEAR_MONTH, INTERVAL_DAY_HOUR, INTERVAL_DAY_MINUTE, - INTERVAL_DAY_SECOND, INTERVAL_HOUR_MINUTE, INTERVAL_HOUR_SECOND, - INTERVAL_MINUTE_SECOND, INTERVAL_DAY_MICROSECOND, INTERVAL_HOUR_MICROSECOND, - INTERVAL_MINUTE_MICROSECOND, INTERVAL_SECOND_MICROSECOND -}; class Item_date_add_interval :public Item_date_func { @@ -741,9 +732,9 @@ public: bool get_date(TIME *ltime, uint fuzzy_date); const char *cast_type() const { return "date"; } enum_field_types field_type() const { return MYSQL_TYPE_DATE; } - Field *tmp_table_field(TABLE *t_arg) + Field *tmp_table_field(TABLE *table) { - return (new Field_date(maybe_null, name, t_arg, &my_charset_bin)); + return tmp_table_field_from_field_type(table, 0); } void fix_length_and_dec() { @@ -763,9 +754,9 @@ public: bool get_time(TIME *ltime); const char *cast_type() const { return "time"; } enum_field_types field_type() const { return MYSQL_TYPE_TIME; } - Field *tmp_table_field(TABLE *t_arg) + Field *tmp_table_field(TABLE *table) { - return (new Field_time(maybe_null, name, t_arg, &my_charset_bin)); + return tmp_table_field_from_field_type(table, 0); } }; @@ -778,9 +769,9 @@ public: String *val_str(String *str); const char *cast_type() const { return "datetime"; } enum_field_types field_type() const { return MYSQL_TYPE_DATETIME; } - Field *tmp_table_field(TABLE *t_arg) + Field *tmp_table_field(TABLE *table) { - return (new Field_datetime(maybe_null, name, t_arg, &my_charset_bin)); + return tmp_table_field_from_field_type(table, 0); } }; @@ -796,9 +787,9 @@ public: decimals=0; max_length=MAX_DATE_WIDTH*MY_CHARSET_BIN_MB_MAXLEN; } - Field *tmp_table_field(TABLE *t_arg) + Field *tmp_table_field(TABLE *table) { - return (new Field_date(maybe_null, name, t_arg, &my_charset_bin)); + return tmp_table_field_from_field_type(table, 0); } }; @@ -816,18 +807,9 @@ public: enum_field_types field_type() const { return cached_field_type; } void fix_length_and_dec(); -/* - TODO: - Change this when we support - microseconds in TIME/DATETIME -*/ - Field *tmp_table_field(TABLE *t_arg) + Field *tmp_table_field(TABLE *table) { - if (cached_field_type == MYSQL_TYPE_TIME) - return (new Field_time(maybe_null, name, t_arg, &my_charset_bin)); - else if (cached_field_type == MYSQL_TYPE_DATETIME) - return (new Field_datetime(maybe_null, name, t_arg, &my_charset_bin)); - return (new Field_string(max_length, maybe_null, name, t_arg, &my_charset_bin)); + return tmp_table_field_from_field_type(table, 0); } void print(String *str); const char *func_name() const { return "add_time"; } @@ -847,9 +829,9 @@ public: max_length=MAX_TIME_WIDTH*MY_CHARSET_BIN_MB_MAXLEN; maybe_null= 1; } - Field *tmp_table_field(TABLE *t_arg) + Field *tmp_table_field(TABLE *table) { - return (new Field_time(maybe_null, name, t_arg, &my_charset_bin)); + return tmp_table_field_from_field_type(table, 0); } }; @@ -866,9 +848,9 @@ public: decimals=0; max_length=MAX_TIME_WIDTH*MY_CHARSET_BIN_MB_MAXLEN; } - Field *tmp_table_field(TABLE *t_arg) + Field *tmp_table_field(TABLE *table) { - return (new Field_time(maybe_null, name, t_arg, &my_charset_bin)); + return tmp_table_field_from_field_type(table, 0); } }; @@ -942,7 +924,10 @@ public: const char *func_name() const { return "str_to_date"; } enum_field_types field_type() const { return cached_field_type; } void fix_length_and_dec(); - Field *tmp_table_field(TABLE *t_arg); + Field *tmp_table_field(TABLE *table) + { + return tmp_table_field_from_field_type(table, 1); + } }; diff --git a/sql/item_uniq.cc b/sql/item_uniq.cc index 79b2ca68f4f..9db8228b345 100644 --- a/sql/item_uniq.cc +++ b/sql/item_uniq.cc @@ -25,5 +25,8 @@ Field *Item_sum_unique_users::create_tmp_field(bool group, TABLE *table, uint convert_blob_length) { - return new Field_long(9,maybe_null,name,table,1); + Field *field= new Field_long(9, maybe_null, name, 1); + if (field) + field->init(table); + return field; } diff --git a/sql/item_xmlfunc.cc b/sql/item_xmlfunc.cc new file mode 100644 index 00000000000..26c2e84f8dd --- /dev/null +++ b/sql/item_xmlfunc.cc @@ -0,0 +1,2615 @@ +/* Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + + +#ifdef __GNUC__ +#pragma implementation +#endif + +#include "mysql_priv.h" +#include "my_xml.h" + + +/* + TODO: future development directions: + 1. add real constants for XPATH_NODESET_CMP and XPATH_NODESET + into enum Type in item.h. + 2. add nodeset_to_nodeset_comparator + 3. add lacking functions: + - name() + - last() + - lang() + - string() + - id() + - translate() + - local-name() + - starts-with() + - namespace-uri() + - substring-after() + - normalize-space() + - substring-before() + 4. add lacking axis: + - following-sibling + - following, + - preceding-sibling + - preceding +*/ + + +/* Structure to store a parsed XML tree */ +typedef struct my_xml_node_st +{ + uint level; /* level in XML tree, 0 means root node */ + enum my_xml_node_type type; /* node type: node, or attribute, or text */ + uint parent; /* link to the parent */ + const char *beg; /* beginning of the name or text */ + const char *end; /* end of the name or text */ + const char *tagend; /* where this tag ends */ +} MY_XML_NODE; + + +/* Lexical analizer token */ +typedef struct my_xpath_lex_st +{ + int term; /* token type, see MY_XPATH_LEX_XXXXX below */ + const char *beg; /* beginnign of the token */ + const char *end; /* end of the token */ +} MY_XPATH_LEX; + + +/* Structure to store nodesets */ +typedef struct my_xpath_flt_st +{ + uint num; /* absolute position in MY_XML_NODE array */ + uint pos; /* relative position in context */ +} MY_XPATH_FLT; + + +/* XPath function creator */ +typedef struct my_xpath_function_names_st +{ + const char *name; /* function name */ + size_t length; /* function name length */ + size_t minargs; /* min number of arguments */ + size_t maxargs; /* max number of arguments */ + Item *(*create)(struct my_xpath_st *xpath, Item **args, uint nargs); +} MY_XPATH_FUNC; + + +/* XPath query parser */ +typedef struct my_xpath_st +{ + int debug; + MY_XPATH_LEX query; /* Whole query */ + MY_XPATH_LEX lasttok; /* last scanned token */ + MY_XPATH_LEX prevtok; /* previous scanned token */ + int axis; /* last scanned axis */ + int extra; /* last scanned "extra", context dependent */ + MY_XPATH_FUNC *func; /* last scanned function creator */ + Item *item; /* current expression */ + Item *context; /* last scanned context */ + Item *rootelement; /* The root element */ + String *context_cache; /* last context provider */ + String *pxml; /* Parsed XML, an array of MY_XML_NODE */ + CHARSET_INFO *cs; /* character set/collation string comparison */ +} MY_XPATH; + + +/* Dynamic array of MY_XPATH_FLT */ +class XPathFilter :public String +{ +public: + XPathFilter() :String() {} + inline bool append_element(MY_XPATH_FLT *flt) + { + String *str= this; + return str->append((const char*)flt, (uint32) sizeof(MY_XPATH_FLT)); + } + inline bool append_element(uint32 num, uint32 pos) + { + MY_XPATH_FLT add; + add.num= num; + add.pos= pos; + return append_element(&add); + } + inline MY_XPATH_FLT *element(uint i) + { + return (MY_XPATH_FLT*) (ptr() + i * sizeof(MY_XPATH_FLT)); + } + inline uint32 numelements() + { + return length() / sizeof(MY_XPATH_FLT); + } +}; + + +/* + Common features of the functions returning a node set. +*/ +class Item_nodeset_func :public Item_str_func +{ +protected: + String tmp_value, tmp2_value; + MY_XPATH_FLT *fltbeg, *fltend; + MY_XML_NODE *nodebeg, *nodeend; + uint numnodes; +public: + String *pxml; + String context_cache; + Item_nodeset_func(String *pxml_arg) :Item_str_func(), pxml(pxml_arg) {} + Item_nodeset_func(Item *a, String *pxml_arg) + :Item_str_func(a), pxml(pxml_arg) {} + Item_nodeset_func(Item *a, Item *b, String *pxml_arg) + :Item_str_func(a, b), pxml(pxml_arg) {} + Item_nodeset_func(Item *a, Item *b, Item *c, String *pxml_arg) + :Item_str_func(a,b,c), pxml(pxml_arg) {} + void prepare_nodes() + { + nodebeg= (MY_XML_NODE*) pxml->ptr(); + nodeend= (MY_XML_NODE*) (pxml->ptr() + pxml->length()); + numnodes= nodeend - nodebeg; + } + void prepare(String *nodeset) + { + prepare_nodes(); + String *res= args[0]->val_nodeset(&tmp_value); + fltbeg= (MY_XPATH_FLT*) res->ptr(); + fltend= (MY_XPATH_FLT*) (res->ptr() + res->length()); + nodeset->length(0); + } + enum Type type() const { return XPATH_NODESET; } + String *val_str(String *str) + { + prepare_nodes(); + String *res= val_nodeset(&tmp2_value); + fltbeg= (MY_XPATH_FLT*) res->ptr(); + fltend= (MY_XPATH_FLT*) (res->ptr() + res->length()); + String active; + active.alloc(numnodes); + bzero((char*) active.ptr(), numnodes); + for (MY_XPATH_FLT *flt= fltbeg; flt < fltend; flt++) + { + MY_XML_NODE *node; + uint j; + for (j=0, node= nodebeg ; j < numnodes; j++, node++) + { + if (node->type == MY_XML_NODE_TEXT && + node->parent == flt->num) + active[j]= 1; + } + } + + str->length(0); + str->set_charset(collation.collation); + for (uint i=0 ; i < numnodes; i++) + { + if(active[i]) + { + if (str->length()) + str->append(" ", 1, &my_charset_latin1); + str->append(nodebeg[i].beg, nodebeg[i].end - nodebeg[i].beg); + } + } + return str; + } + enum Item_result result_type () const { return STRING_RESULT; } + void fix_length_and_dec() { max_length= MAX_BLOB_WIDTH; } + const char *func_name() const { return "nodeset"; } +}; + + +/* Returns an XML root */ +class Item_nodeset_func_rootelement :public Item_nodeset_func +{ +public: + Item_nodeset_func_rootelement(String *pxml): Item_nodeset_func(pxml) {} + const char *func_name() const { return "xpath_rootelement"; } + String *val_nodeset(String *nodeset); +}; + + +/* Returns a Union of two node sets */ +class Item_nodeset_func_union :public Item_nodeset_func +{ +public: + Item_nodeset_func_union(Item *a, Item *b, String *pxml) + :Item_nodeset_func(a, b, pxml) {} + const char *func_name() const { return "xpath_union"; } + String *val_nodeset(String *nodeset); +}; + + +/* Makes one step towards the given axis */ +class Item_nodeset_func_axisbyname :public Item_nodeset_func +{ + const char *node_name; + uint node_namelen; +public: + Item_nodeset_func_axisbyname(Item *a, const char *n_arg, uint l_arg, + String *pxml): + Item_nodeset_func(a, pxml), node_name(n_arg), node_namelen(l_arg) { } + const char *func_name() const { return "xpath_axisbyname"; } + bool validname(MY_XML_NODE *n) + { + if (node_name[0] == '*') + return 1; + return (node_namelen == (uint) (n->end - n->beg)) && + !memcmp(node_name, n->beg, node_namelen); + } +}; + + +/* Returns self */ +class Item_nodeset_func_selfbyname: public Item_nodeset_func_axisbyname +{ +public: + Item_nodeset_func_selfbyname(Item *a, const char *n_arg, uint l_arg, + String *pxml): + Item_nodeset_func_axisbyname(a, n_arg, l_arg, pxml) {} + const char *func_name() const { return "xpath_selfbyname"; } + String *val_nodeset(String *nodeset); +}; + + +/* Returns children */ +class Item_nodeset_func_childbyname: public Item_nodeset_func_axisbyname +{ +public: + Item_nodeset_func_childbyname(Item *a, const char *n_arg, uint l_arg, + String *pxml): + Item_nodeset_func_axisbyname(a, n_arg, l_arg, pxml) {} + const char *func_name() const { return "xpath_childbyname"; } + String *val_nodeset(String *nodeset); +}; + + +/* Returns descendants */ +class Item_nodeset_func_descendantbyname: public Item_nodeset_func_axisbyname +{ + bool need_self; +public: + Item_nodeset_func_descendantbyname(Item *a, const char *n_arg, uint l_arg, + String *pxml, bool need_self_arg): + Item_nodeset_func_axisbyname(a, n_arg, l_arg, pxml), + need_self(need_self_arg) {} + const char *func_name() const { return "xpath_descendantbyname"; } + String *val_nodeset(String *nodeset); +}; + + +/* Returns ancestors */ +class Item_nodeset_func_ancestorbyname: public Item_nodeset_func_axisbyname +{ + bool need_self; +public: + Item_nodeset_func_ancestorbyname(Item *a, const char *n_arg, uint l_arg, + String *pxml, bool need_self_arg): + Item_nodeset_func_axisbyname(a, n_arg, l_arg, pxml), + need_self(need_self_arg) {} + const char *func_name() const { return "xpath_ancestorbyname"; } + String *val_nodeset(String *nodeset); +}; + + +/* Returns parents */ +class Item_nodeset_func_parentbyname: public Item_nodeset_func_axisbyname +{ +public: + Item_nodeset_func_parentbyname(Item *a, const char *n_arg, uint l_arg, + String *pxml): + Item_nodeset_func_axisbyname(a, n_arg, l_arg, pxml) {} + const char *func_name() const { return "xpath_parentbyname"; } + String *val_nodeset(String *nodeset); +}; + + +/* Returns attributes */ +class Item_nodeset_func_attributebyname: public Item_nodeset_func_axisbyname +{ +public: + Item_nodeset_func_attributebyname(Item *a, const char *n_arg, uint l_arg, + String *pxml): + Item_nodeset_func_axisbyname(a, n_arg, l_arg, pxml) {} + const char *func_name() const { return "xpath_attributebyname"; } + String *val_nodeset(String *nodeset); +}; + + +/* + Condition iterator: goes through all nodes in the current + context and checks a condition, returning those nodes + giving TRUE condition result. +*/ +class Item_nodeset_func_predicate :public Item_nodeset_func +{ +public: + Item_nodeset_func_predicate(Item *a, Item *b, String *pxml): + Item_nodeset_func(a, b, pxml) {} + const char *func_name() const { return "xpath_predicate"; } + String *val_nodeset(String *nodeset); +}; + + +/* Selects nodes with a given position in context */ +class Item_nodeset_func_elementbyindex :public Item_nodeset_func +{ +public: + Item_nodeset_func_elementbyindex(Item *a, Item *b, String *pxml): + Item_nodeset_func(a, b, pxml) { } + const char *func_name() const { return "xpath_elementbyindex"; } + String *val_nodeset(String *nodeset); +}; + + +/* + We need to distinguish a number from a boolean: + a[1] and a[true] are different things in XPath. +*/ +class Item_bool :public Item_int +{ +public: + Item_bool(int32 i): Item_int(i) {} + const char *func_name() const { return "xpath_bool"; } + bool is_bool_func() { return 1; } +}; + + +/* + Converts its argument into a boolean value. + * a number is true if it is non-zero + * a node-set is true if and only if it is non-empty + * a string is true if and only if its length is non-zero +*/ +class Item_xpath_cast_bool :public Item_int_func +{ + String *pxml; + String tmp_value; +public: + Item_xpath_cast_bool(Item *a, String *pxml_arg) + :Item_int_func(a), pxml(pxml_arg) {} + const char *func_name() const { return "xpath_cast_bool"; } + bool is_bool_func() { return 1; } + longlong val_int() + { + if (args[0]->type() == XPATH_NODESET) + { + String *flt= args[0]->val_nodeset(&tmp_value); + return flt->length() == sizeof(MY_XPATH_FLT) ? 1 : 0; + } + return args[0]->val_real() ? 1 : 0; + } +}; + + +/* + Converts its argument into a number +*/ +class Item_xpath_cast_number :public Item_real_func +{ +public: + Item_xpath_cast_number(Item *a): Item_real_func(a) {} + const char *func_name() const { return "xpath_cast_number"; } + virtual double val_real() { return args[0]->val_real(); } +}; + + +/* + Context cache, for predicate +*/ +class Item_nodeset_context_cache :public Item_nodeset_func +{ +public: + String *string_cache; + Item_nodeset_context_cache(String *str_arg, String *pxml): + Item_nodeset_func(pxml), string_cache(str_arg) { } + String *val_nodeset(String *res) + { return string_cache; } + void fix_length_and_dec() { max_length= MAX_BLOB_WIDTH; } +}; + + +class Item_func_xpath_position :public Item_int_func +{ + String *pxml; + String tmp_value; +public: + Item_func_xpath_position(Item *a, String *p) + :Item_int_func(a), pxml(p) {} + const char *func_name() const { return "xpath_position"; } + void fix_length_and_dec() { max_length=10; } + longlong val_int() + { + String *flt= args[0]->val_nodeset(&tmp_value); + if (flt->length() == sizeof(MY_XPATH_FLT)) + return ((MY_XPATH_FLT*)flt->ptr())->pos + 1; + return 0; + } +}; + + +class Item_func_xpath_count :public Item_int_func +{ + String *pxml; + String tmp_value; +public: + Item_func_xpath_count(Item *a, String *p) + :Item_int_func(a), pxml(p) {} + const char *func_name() const { return "xpath_count"; } + void fix_length_and_dec() { max_length=10; } + longlong val_int() + { + String *res= args[0]->val_nodeset(&tmp_value); + return res->length() / sizeof(MY_XPATH_FLT); + } +}; + + +class Item_func_xpath_sum :public Item_real_func +{ + String *pxml; + String tmp_value; +public: + Item_func_xpath_sum(Item *a, String *p) + :Item_real_func(a), pxml(p) {} + + const char *func_name() const { return "xpath_sum"; } + double val_real() + { + double sum= 0; + String *res= args[0]->val_nodeset(&tmp_value); + MY_XPATH_FLT *fltbeg= (MY_XPATH_FLT*) res->ptr(); + MY_XPATH_FLT *fltend= (MY_XPATH_FLT*) (res->ptr() + res->length()); + uint numnodes= pxml->length() / sizeof(MY_XML_NODE); + MY_XML_NODE *nodebeg= (MY_XML_NODE*) pxml->ptr(); + + for (MY_XPATH_FLT *flt= fltbeg; flt < fltend; flt++) + { + MY_XML_NODE *self= &nodebeg[flt->num]; + for (uint j= flt->num + 1; j < numnodes; j++) + { + MY_XML_NODE *node= &nodebeg[j]; + if (node->level <= self->level) + break; + if ((node->parent == flt->num) && + (node->type == MY_XML_NODE_TEXT)) + { + char *end; + int err; + double add= my_strntod(collation.collation, (char*) node->beg, + node->end - node->beg, &end, &err); + if (!err) + sum+= add; + } + } + } + return sum; + } +}; + + +class Item_nodeset_to_const_comparator :public Item_bool_func +{ + String *pxml; + String tmp_nodeset; +public: + Item_nodeset_to_const_comparator(Item *nodeset, Item *cmpfunc, String *p) + :Item_bool_func(nodeset,cmpfunc), pxml(p) {} + enum Type type() const { return XPATH_NODESET_CMP; }; + const char *func_name() const { return "xpath_nodeset_to_const_comparator"; } + bool is_bool_func() { return 1; } + + longlong val_int() + { + Item_func *comp= (Item_func*)args[1]; + Item_string *fake= (Item_string*)(comp->arguments()[1]); + String *res= args[0]->val_nodeset(&tmp_nodeset); + MY_XPATH_FLT *fltbeg= (MY_XPATH_FLT*) res->ptr(); + MY_XPATH_FLT *fltend= (MY_XPATH_FLT*) (res->ptr() + res->length()); + MY_XML_NODE *nodebeg= (MY_XML_NODE*) pxml->ptr(); + uint numnodes= pxml->length() / sizeof(MY_XML_NODE); + + for (MY_XPATH_FLT *flt= fltbeg; flt < fltend; flt++) + { + MY_XML_NODE *self= &nodebeg[flt->num]; + for (uint j= flt->num + 1; j < numnodes; j++) + { + MY_XML_NODE *node= &nodebeg[j]; + if (node->level <= self->level) + break; + if ((node->parent == flt->num) && + (node->type == MY_XML_NODE_TEXT)) + { + fake->str_value.set(node->beg, node->end - node->beg, + collation.collation); + if (args[1]->val_int()) + return 1; + } + } + } + return 0; + } +}; + + +String *Item_nodeset_func_rootelement::val_nodeset(String *nodeset) +{ + nodeset->length(0); + ((XPathFilter*)nodeset)->append_element(0, 0); + return nodeset; +} + + +String * Item_nodeset_func_union::val_nodeset(String *nodeset) +{ + uint numnodes= pxml->length() / sizeof(MY_XML_NODE); + String set0, *s0= args[0]->val_nodeset(&set0); + String set1, *s1= args[1]->val_nodeset(&set1); + String both_str; + both_str.alloc(numnodes); + char *both= (char*) both_str.ptr(); + bzero((void*)both, numnodes); + uint pos= 0; + MY_XPATH_FLT *flt; + + fltbeg= (MY_XPATH_FLT*) s0->ptr(); + fltend= (MY_XPATH_FLT*) (s0->ptr() + s0->length()); + for (flt= fltbeg; flt < fltend; flt++) + both[flt->num]= 1; + + fltbeg= (MY_XPATH_FLT*) s1->ptr(); + fltend= (MY_XPATH_FLT*) (s1->ptr() + s1->length()); + for (flt= fltbeg; flt < fltend; flt++) + both[flt->num]= 1; + + nodeset->length(0); + for (uint i= 0, pos= 0; i < numnodes; i++) + { + if (both[i]) + ((XPathFilter*)nodeset)->append_element(i, pos++); + } + return nodeset; +} + + +String *Item_nodeset_func_selfbyname::val_nodeset(String *nodeset) +{ + prepare(nodeset); + for (MY_XPATH_FLT *flt= fltbeg; flt < fltend; flt++) + { + uint pos= 0; + MY_XML_NODE *self= &nodebeg[flt->num]; + if (validname(self)) + ((XPathFilter*)nodeset)->append_element(flt->num,pos++); + } + return nodeset; +} + + +String *Item_nodeset_func_childbyname::val_nodeset(String *nodeset) +{ + prepare(nodeset); + for (MY_XPATH_FLT *flt= fltbeg; flt < fltend; flt++) + { + MY_XML_NODE *self= &nodebeg[flt->num]; + for (uint pos= 0, j= flt->num + 1 ; j < numnodes; j++) + { + MY_XML_NODE *node= &nodebeg[j]; + if (node->level <= self->level) + break; + if ((node->parent == flt->num) && + (node->type == MY_XML_NODE_TAG) && + validname(node)) + ((XPathFilter*)nodeset)->append_element(j, pos++); + } + } + return nodeset; +} + + +String *Item_nodeset_func_descendantbyname::val_nodeset(String *nodeset) +{ + prepare(nodeset); + for (MY_XPATH_FLT *flt= fltbeg; flt < fltend; flt++) + { + uint pos= 0; + MY_XML_NODE *self= &nodebeg[flt->num]; + if (need_self && validname(self)) + ((XPathFilter*)nodeset)->append_element(flt->num,pos++); + for (uint j= flt->num + 1 ; j < numnodes ; j++) + { + MY_XML_NODE *node= &nodebeg[j]; + if (node->level <= self->level) + break; + if ((node->type == MY_XML_NODE_TAG) && validname(node)) + ((XPathFilter*)nodeset)->append_element(j,pos++); + } + } + return nodeset; +} + + +String *Item_nodeset_func_ancestorbyname::val_nodeset(String *nodeset) +{ + char *active; + String active_str; + prepare(nodeset); + active_str.alloc(numnodes); + active= (char*) active_str.ptr(); + bzero((void*)active, numnodes); + uint pos= 0; + + for (MY_XPATH_FLT *flt= fltbeg; flt < fltend; flt++) + { + /* + Go to the root and add all nodes on the way. + Don't add the root if context is the root itelf + */ + MY_XML_NODE *self= &nodebeg[flt->num]; + if (need_self && validname(self)) + { + active[flt->num]= 1; + pos++; + } + + for (uint j= self->parent; nodebeg[j].parent != j; j= nodebeg[j].parent) + { + if (flt->num && validname(&nodebeg[j])) + { + active[j]= 1; + pos++; + } + } + } + + for (uint j= 0; j < numnodes ; j++) + { + if (active[j]) + ((XPathFilter*)nodeset)->append_element(j, --pos); + } + return nodeset; +} + + +String *Item_nodeset_func_parentbyname::val_nodeset(String *nodeset) +{ + char *active; + String active_str; + prepare(nodeset); + active_str.alloc(numnodes); + active= (char*) active_str.ptr(); + bzero((void*)active, numnodes); + for (MY_XPATH_FLT *flt= fltbeg; flt < fltend; flt++) + { + uint j= nodebeg[flt->num].parent; + if (flt->num && validname(&nodebeg[j])) + active[j]= 1; + } + for (uint j= 0, pos= 0; j < numnodes ; j++) + { + if (active[j]) + ((XPathFilter*)nodeset)->append_element(j, pos++); + } + return nodeset; +} + + +String *Item_nodeset_func_attributebyname::val_nodeset(String *nodeset) +{ + prepare(nodeset); + for (MY_XPATH_FLT *flt= fltbeg; flt < fltend; flt++) + { + MY_XML_NODE *self= &nodebeg[flt->num]; + for (uint pos=0, j= flt->num + 1 ; j < numnodes; j++) + { + MY_XML_NODE *node= &nodebeg[j]; + if (node->level <= self->level) + break; + if ((node->parent == flt->num) && + (node->type == MY_XML_NODE_ATTR) && + validname(node)) + ((XPathFilter*)nodeset)->append_element(j, pos++); + } + } + return nodeset; +} + + +String *Item_nodeset_func_predicate::val_nodeset(String *str) +{ + Item_nodeset_func *nodeset_func= (Item_nodeset_func*) args[0]; + Item_func *comp_func= (Item_func*)args[1]; + uint pos= 0; + prepare(str); + for (MY_XPATH_FLT *flt= fltbeg; flt < fltend; flt++) + { + nodeset_func->context_cache.length(0); + ((XPathFilter*)(&nodeset_func->context_cache))->append_element(flt->num, + flt->pos); + if (comp_func->val_int()) + ((XPathFilter*)str)->append_element(flt->num, pos++); + } + return str; +} + + +String *Item_nodeset_func_elementbyindex::val_nodeset(String *nodeset) +{ + prepare(nodeset); + int index= args[1]->val_int() - 1; + if (index >= 0) + { + MY_XPATH_FLT *flt; + uint pos; + for (pos= 0, flt= fltbeg; flt < fltend; flt++) + { + if (flt->pos == (uint) index || args[1]->is_bool_func()) + ((XPathFilter*)nodeset)->append_element(flt->num, pos++); + } + } + return nodeset; +} + + +/* + If item is a node set, then casts it to boolean, + otherwise returns the item itself. +*/ +static Item* nodeset2bool(MY_XPATH *xpath, Item *item) +{ + if (item->type() == Item::XPATH_NODESET) + return new Item_xpath_cast_bool(item, xpath->pxml); + return item; +} + + +/* + XPath lexical tokens +*/ +#define MY_XPATH_LEX_DIGITS 'd' +#define MY_XPATH_LEX_IDENT 'i' +#define MY_XPATH_LEX_STRING 's' +#define MY_XPATH_LEX_SLASH '/' +#define MY_XPATH_LEX_LB '[' +#define MY_XPATH_LEX_RB ']' +#define MY_XPATH_LEX_LP '(' +#define MY_XPATH_LEX_RP ')' +#define MY_XPATH_LEX_EQ '=' +#define MY_XPATH_LEX_LESS '<' +#define MY_XPATH_LEX_GREATER '>' +#define MY_XPATH_LEX_AT '@' +#define MY_XPATH_LEX_COLON ':' +#define MY_XPATH_LEX_ASTERISK '*' +#define MY_XPATH_LEX_DOT '.' +#define MY_XPATH_LEX_VLINE '|' +#define MY_XPATH_LEX_MINUS '-' +#define MY_XPATH_LEX_PLUS '+' +#define MY_XPATH_LEX_EXCL '!' +#define MY_XPATH_LEX_COMMA ',' +#define MY_XPATH_LEX_DOLLAR '$' +#define MY_XPATH_LEX_ERROR 'A' +#define MY_XPATH_LEX_EOF 'B' +#define MY_XPATH_LEX_AND 'C' +#define MY_XPATH_LEX_OR 'D' +#define MY_XPATH_LEX_DIV 'E' +#define MY_XPATH_LEX_MOD 'F' +#define MY_XPATH_LEX_FUNC 'G' +#define MY_XPATH_LEX_NODETYPE 'H' +#define MY_XPATH_LEX_AXIS 'I' +#define MY_XPATH_LEX_LE 'J' +#define MY_XPATH_LEX_GE 'K' + + +/* + XPath axis type +*/ +#define MY_XPATH_AXIS_ANCESTOR 0 +#define MY_XPATH_AXIS_ANCESTOR_OR_SELF 1 +#define MY_XPATH_AXIS_ATTRIBUTE 2 +#define MY_XPATH_AXIS_CHILD 3 +#define MY_XPATH_AXIS_DESCENDANT 4 +#define MY_XPATH_AXIS_DESCENDANT_OR_SELF 5 +#define MY_XPATH_AXIS_FOLLOWING 6 +#define MY_XPATH_AXIS_FOLLOWING_SIBLING 7 +#define MY_XPATH_AXIS_NAMESPACE 8 +#define MY_XPATH_AXIS_PARENT 9 +#define MY_XPATH_AXIS_PRECEDING 10 +#define MY_XPATH_AXIS_PRECEDING_SIBLING 11 +#define MY_XPATH_AXIS_SELF 12 + + +/* + Create scalar comparator + + SYNOPSYS + Create a comparator function for scalar arguments, + for the given arguments and operation. + + RETURN + The newly created item. +*/ +static Item *eq_func(int oper, Item *a, Item *b) +{ + switch (oper) + { + case '=': return new Item_func_eq(a, b); + case '!': return new Item_func_ne(a, b); + case MY_XPATH_LEX_GE: return new Item_func_ge(a, b); + case MY_XPATH_LEX_LE: return new Item_func_le(a, b); + case MY_XPATH_LEX_GREATER: return new Item_func_gt(a, b); + case MY_XPATH_LEX_LESS: return new Item_func_lt(a, b); + } + return 0; +} + + +/* + Create scalar comparator + + SYNOPSYS + Create a comparator function for scalar arguments, + for the given arguments and reverse operation, e.g. + + A >= B is converted into A < B + + RETURN + The newly created item. +*/ +static Item *eq_func_reverse(int oper, Item *a, Item *b) +{ + switch (oper) + { + case '=': return new Item_func_eq(a, b); + case '!': return new Item_func_ne(a, b); + case MY_XPATH_LEX_GE: return new Item_func_lt(a, b); + case MY_XPATH_LEX_LE: return new Item_func_gt(a, b); + case MY_XPATH_LEX_GREATER: return new Item_func_le(a, b); + case MY_XPATH_LEX_LESS: return new Item_func_ge(a, b); + } + return 0; +} + + +/* + Create a comparator + + SYNOPSYS + Create a comparator for scalar or non-scalar arguments, + for the given arguments and operation. + + RETURN + The newly created item. +*/ +static Item *create_comparator(MY_XPATH *xpath, int oper, Item *a, Item *b) +{ + if (a->type() != Item::XPATH_NODESET && + b->type() != Item::XPATH_NODESET) + { + return eq_func(oper, a, b); // two scalar arguments + } + else if (a->type() == Item::XPATH_NODESET && + b->type() == Item::XPATH_NODESET) + { + return 0; // TODO: Comparison of two nodesets + } + else + { + /* + Compare a node set to a scalar value. + We just create a fake Item_string() argument, + which will be filled to the partular value + in a loop through all of the nodes in the node set. + */ + + Item *fake= new Item_string("", 0, xpath->cs); + Item_nodeset_func *nodeset; + Item *scalar, *comp; + if (a->type() == Item::XPATH_NODESET) + { + nodeset= (Item_nodeset_func*) a; + scalar= b; + comp= eq_func(oper, scalar, fake); + } + else + { + nodeset= (Item_nodeset_func*) b; + scalar= a; + comp= eq_func_reverse(oper, scalar, fake); + } + return new Item_nodeset_to_const_comparator(nodeset, comp, xpath->pxml); + } +} + + +/* + Create a step + + SYNOPSYS + Create a step function for the given argument and axis. + + RETURN + The newly created item. +*/ +static Item* nametestfunc(MY_XPATH *xpath, + int type, Item *arg, const char *beg, uint len) +{ + DBUG_ASSERT(arg != 0); + DBUG_ASSERT(arg->type() == Item::XPATH_NODESET); + DBUG_ASSERT(beg != 0); + DBUG_ASSERT(len > 0); + + Item *res; + switch (type) + { + case MY_XPATH_AXIS_ANCESTOR: + res= new Item_nodeset_func_ancestorbyname(arg, beg, len, xpath->pxml, 0); + break; + case MY_XPATH_AXIS_ANCESTOR_OR_SELF: + res= new Item_nodeset_func_ancestorbyname(arg, beg, len, xpath->pxml, 1); + break; + case MY_XPATH_AXIS_PARENT: + res= new Item_nodeset_func_parentbyname(arg, beg, len, xpath->pxml); + break; + case MY_XPATH_AXIS_DESCENDANT: + res= new Item_nodeset_func_descendantbyname(arg, beg, len, xpath->pxml, 0); + break; + case MY_XPATH_AXIS_DESCENDANT_OR_SELF: + res= new Item_nodeset_func_descendantbyname(arg, beg, len, xpath->pxml, 1); + break; + case MY_XPATH_AXIS_ATTRIBUTE: + res= new Item_nodeset_func_attributebyname(arg, beg, len, xpath->pxml); + break; + case MY_XPATH_AXIS_SELF: + res= new Item_nodeset_func_selfbyname(arg, beg, len, xpath->pxml); + break; + default: + res= new Item_nodeset_func_childbyname(arg, beg, len, xpath->pxml); + } + return res; +} + + +/* + Tokens consisting of one character, for faster lexical analizer. +*/ +static char simpletok[128]= +{ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +/* + ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? + @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ + ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ € +*/ + 0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1,0, + 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0 +}; + + +/* + XPath keywords +*/ +struct my_xpath_keyword_names_st +{ + int tok; + const char *name; + size_t length; + int extra; +}; + + +static struct my_xpath_keyword_names_st my_keyword_names[] = +{ + {MY_XPATH_LEX_AND , "and" , 3, 0 }, + {MY_XPATH_LEX_OR , "or" , 2, 0 }, + {MY_XPATH_LEX_DIV , "div" , 3, 0 }, + {MY_XPATH_LEX_MOD , "mod" , 3, 0 }, + + {MY_XPATH_LEX_NODETYPE, "comment" , 7, 0 }, + {MY_XPATH_LEX_NODETYPE, "text" , 4, 0 }, + {MY_XPATH_LEX_NODETYPE, "processing-instruction" , 22,0 }, + {MY_XPATH_LEX_NODETYPE, "node" , 4, 0 }, + + {MY_XPATH_LEX_AXIS,"ancestor" , 8,MY_XPATH_AXIS_ANCESTOR }, + {MY_XPATH_LEX_AXIS,"ancestor-or-self" ,16,MY_XPATH_AXIS_ANCESTOR_OR_SELF }, + {MY_XPATH_LEX_AXIS,"attribute" , 9,MY_XPATH_AXIS_ATTRIBUTE }, + {MY_XPATH_LEX_AXIS,"child" , 5,MY_XPATH_AXIS_CHILD }, + {MY_XPATH_LEX_AXIS,"descendant" ,10,MY_XPATH_AXIS_DESCENDANT }, + {MY_XPATH_LEX_AXIS,"descendant-or-self",18,MY_XPATH_AXIS_DESCENDANT_OR_SELF}, + {MY_XPATH_LEX_AXIS,"following" , 9,MY_XPATH_AXIS_FOLLOWING }, + {MY_XPATH_LEX_AXIS,"following-sibling" ,17,MY_XPATH_AXIS_FOLLOWING_SIBLING }, + {MY_XPATH_LEX_AXIS,"namespace" , 9,MY_XPATH_AXIS_NAMESPACE }, + {MY_XPATH_LEX_AXIS,"parent" , 6,MY_XPATH_AXIS_PARENT }, + {MY_XPATH_LEX_AXIS,"preceding" , 9,MY_XPATH_AXIS_PRECEDING }, + {MY_XPATH_LEX_AXIS,"preceding-sibling" ,17,MY_XPATH_AXIS_PRECEDING_SIBLING }, + {MY_XPATH_LEX_AXIS,"self" , 4,MY_XPATH_AXIS_SELF }, + + {0,NULL,0,0} +}; + + +/* + Lookup a keyword + + SYNOPSYS + Check that the last scanned identifier is a keyword. + + RETURN + - Token type, on lookup success. + - MY_XPATH_LEX_IDENT, on lookup failure. +*/ +static int my_xpath_keyword(MY_XPATH *x, const char *beg, const char *end) +{ + struct my_xpath_keyword_names_st *k; + size_t length= end-beg; + for (k= my_keyword_names; k->name; k++) + { + if (length == k->length && !strncasecmp(beg, k->name, length)) + { + x->extra= k->extra; + return k->tok; + } + } + return MY_XPATH_LEX_IDENT; +} + + +/* + Functions to create an item, a-la those in item_create.cc +*/ + +static Item *create_func_true(MY_XPATH *xpath, Item **args, uint nargs) +{ + return new Item_bool(1); +} + + +static Item *create_func_false(MY_XPATH *xpath, Item **args, uint nargs) +{ + return new Item_bool(0); +} + + +static Item *create_func_not(MY_XPATH *xpath, Item **args, uint nargs) +{ + return new Item_func_not(nodeset2bool(xpath, args[0])); +} + + +static Item *create_func_ceiling(MY_XPATH *xpath, Item **args, uint nargs) +{ + return new Item_func_ceiling(args[0]); +} + + +static Item *create_func_floor(MY_XPATH *xpath, Item **args, uint nargs) +{ + return new Item_func_floor(args[0]); +} + + +static Item *create_func_bool(MY_XPATH *xpath, Item **args, uint nargs) +{ + return new Item_xpath_cast_bool(args[0], xpath->pxml); +} + + +static Item *create_func_number(MY_XPATH *xpath, Item **args, uint nargs) +{ + return new Item_xpath_cast_number(args[0]); +} + + +static Item *create_func_round(MY_XPATH *xpath, Item **args, uint nargs) +{ + return new Item_func_round(args[0], new Item_int((char*)"0",0,1),0); +} + + +static Item *create_func_last(MY_XPATH *xpath, Item **args, uint nargs) +{ + return new Item_func_xpath_count(xpath->context, xpath->pxml); +} + + +static Item *create_func_position(MY_XPATH *xpath, Item **args, uint nargs) +{ + return new Item_func_xpath_position(xpath->context, xpath->pxml); +} + + +static Item *create_func_contains(MY_XPATH *xpath, Item **args, uint nargs) +{ + return new Item_xpath_cast_bool(new Item_func_locate(args[0], args[1]), + xpath->pxml); +} + + +static Item *create_func_concat(MY_XPATH *xpath, Item **args, uint nargs) +{ + return new Item_func_concat(args[0], args[1]); +} + + +static Item *create_func_substr(MY_XPATH *xpath, Item **args, uint nargs) +{ + if (nargs == 2) + return new Item_func_substr(args[0], args[1]); + else + return new Item_func_substr(args[0], args[1], args[2]); +} + + +static Item *create_func_count(MY_XPATH *xpath, Item **args, uint nargs) +{ + if (args[0]->type() != Item::XPATH_NODESET) + return 0; + return new Item_func_xpath_count(args[0], xpath->pxml); +} + + +static Item *create_func_sum(MY_XPATH *xpath, Item **args, uint nargs) +{ + if (args[0]->type() != Item::XPATH_NODESET) + return 0; + return new Item_func_xpath_sum(args[0], xpath->pxml); +} + + +/* + Functions names. Separate lists for names with + lengths 3,4,5 and 6 for faster lookups. +*/ +static MY_XPATH_FUNC my_func_names3[]= +{ + {"sum", 3, 1 , 1 , create_func_sum}, + {"not", 3, 1 , 1 , create_func_not}, + {0 , 0, 0 , 0, 0} +}; + + +static MY_XPATH_FUNC my_func_names4[]= +{ + {"last", 4, 0, 0, create_func_last}, + {"true", 4, 0, 0, create_func_true}, + {"name", 4, 0, 1, 0}, + {"lang", 4, 1, 1, 0}, + {0 , 0, 0, 0, 0} +}; + + +static MY_XPATH_FUNC my_func_names5[]= +{ + {"count", 5, 1, 1, create_func_count}, + {"false", 5, 0, 0, create_func_false}, + {"floor", 5, 1, 1, create_func_floor}, + {"round", 5, 1, 1, create_func_round}, + {0 , 0, 0, 0, 0} +}; + + +static MY_XPATH_FUNC my_func_names6[]= +{ + {"concat", 6, 2, 255, create_func_concat}, + {"number", 6, 0, 1 , create_func_number}, + {"string", 6, 0, 1 , 0}, + {0 , 0, 0, 0 , 0} +}; + + +/* Other functions, with name longer than 6, all together */ +static MY_XPATH_FUNC my_func_names[] = +{ + {"id" , 2 , 1 , 1 , 0}, + {"boolean" , 7 , 1 , 1 , create_func_bool}, + {"ceiling" , 7 , 1 , 1 , create_func_ceiling}, + {"position" , 8 , 0 , 0 , create_func_position}, + {"contains" , 8 , 2 , 2 , create_func_contains}, + {"substring" , 9 , 2 , 3 , create_func_substr}, + {"translate" , 9 , 3 , 3 , 0}, + + {"local-name" , 10 , 0 , 1 , 0}, + {"starts-with" , 11 , 2 , 2 , 0}, + {"namespace-uri" , 13 , 0 , 1 , 0}, + {"substring-after" , 15 , 2 , 2 , 0}, + {"normalize-space" , 15 , 0 , 1 , 0}, + {"substring-before" , 16 , 2 , 2 , 0}, + + {NULL,0,0,0,0} +}; + + +/* + Lookup a function by name + + SYNOPSYS + Lookup a function by its name. + + RETURN + Pointer to a MY_XPATH_FUNC variable on success. + 0 - on failure. + +*/ +MY_XPATH_FUNC * +my_xpath_function(const char *beg, const char *end) +{ + MY_XPATH_FUNC *k, *function_names; + uint length= end-beg; + switch (length) + { + case 1: return 0; + case 3: function_names= my_func_names3; break; + case 4: function_names= my_func_names4; break; + case 5: function_names= my_func_names5; break; + case 6: function_names= my_func_names6; break; + default: function_names= my_func_names; + } + for (k= function_names; k->name; k++) + if (k->create && length == k->length && !strncasecmp(beg, k->name, length)) + return k; + return NULL; +} + + +/* Initialize a lex analizer token */ +static void +my_xpath_lex_init(MY_XPATH_LEX *lex, + const char *str, const char *strend) +{ + lex->beg= str; + lex->end= strend; +} + + +/* Initialize an XPath query parser */ +static void +my_xpath_init(MY_XPATH *xpath) +{ + bzero((void*)xpath, sizeof(xpath[0])); +} + + +/* + Some ctype-alike helper functions. Note, we cannot + reuse cs->ident_map[], because in Xpath, unlike in SQL, + dash character is a valid identifier part. +*/ +static int +my_xident_beg(int c) +{ + return (((c) >= 'a' && (c) <= 'z') || + ((c) >= 'A' && (c) <= 'Z') || + ((c) == '_')); +} + + +static int +my_xident_body(int c) +{ + return (((c) >= 'a' && (c) <= 'z') || + ((c) >= 'A' && (c) <= 'Z') || + ((c) >= '0' && (c) <= '9') || + ((c)=='-') || ((c) == '_')); +} + + +static int +my_xdigit(int c) +{ + return ((c) >= '0' && (c) <= '9'); +} + + +/* + Scan the next token + + SYNOPSYS + Scan the next token from the input. + lex->term is set to the scanned token type. + lex->beg and lex->end are set to the beginnig + and to the end of the token. + RETURN + N/A +*/ +static void +my_xpath_lex_scan(MY_XPATH *xpath, + MY_XPATH_LEX *lex, const char *beg, const char *end) +{ + int ch; + for ( ; beg < end && *beg == ' ' ; beg++); // skip leading spaces + lex->beg= beg; + + if (beg >= end) + { + lex->end= beg; + lex->term= MY_XPATH_LEX_EOF; // end of line reached + return; + } + ch= *beg++; + + if (ch > 0 && ch < 128 && simpletok[ch]) + { + // a token consisting of one character found + lex->end= beg; + lex->term= ch; + return; + } + + if (my_xident_beg(ch)) // ident, or a function call, or a keyword + { + // scan until the end of the identifier + for ( ; beg < end && my_xident_body(*beg); beg++); + lex->end= beg; + + // check if a function call + if (*beg == '(' && (xpath->func= my_xpath_function(lex->beg, beg))) + { + lex->term= MY_XPATH_LEX_FUNC; + return; + } + + // check if a keyword + lex->term= my_xpath_keyword(xpath, lex->beg, beg); + return; + } + + if (my_xdigit(ch)) // a sequence of digits + { + for ( ; beg < end && my_xdigit(*beg) ; beg++); + lex->end= beg; + lex->term= MY_XPATH_LEX_DIGITS; + return; + } + + if (ch == '"' || ch == '\'') // a string: either '...' or "..." + { + for ( ; beg < end && *beg != ch ; beg++); + if (beg < end) + { + lex->end= beg+1; + lex->term= MY_XPATH_LEX_STRING; + return; + } + else + { + // unexpected end-of-line, without closing quot sign + lex->end= end; + lex->term= MY_XPATH_LEX_ERROR; + return; + } + } + + lex->end= beg; + lex->term= MY_XPATH_LEX_ERROR; // unknown character + return; +} + + +/* + Scan the given token + + SYNOPSYS + Scan the given token and rotate lasttok to prevtok on success. + + RETURN + 1 - success + 0 - failure +*/ +static int +my_xpath_parse_term(MY_XPATH *xpath, int term) +{ + if (xpath->lasttok.term == term) + { + xpath->prevtok= xpath->lasttok; + my_xpath_lex_scan(xpath, &xpath->lasttok, + xpath->lasttok.end, xpath->query.end); + return 1; + } + return 0; +} + + +/* + Scan AxisName + + SYNOPSYS + Scan an axis name and store the scanned axis type into xpath->axis. + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_AxisName(MY_XPATH *xpath) +{ + int rc= my_xpath_parse_term(xpath, MY_XPATH_LEX_AXIS); + xpath->axis= xpath->extra; + return rc; +} + + +/********************************************* +** Grammar rules, according to http://www.w3.org/TR/xpath +** Implemented using recursive descendant method. +** All the following grammar processing functions accept +** a signle "xpath" argument and return 1 on success and 0 on error. +** They also modify "xpath" argument by creating new items. +*/ + +/* [9] PredicateExpr ::= Expr */ +#define my_xpath_parse_PredicateExpr(x) my_xpath_parse_Expr((x)) + +/* [14] Expr ::= OrExpr */ +#define my_xpath_parse_Expr(x) my_xpath_parse_OrExpr((x)) + +static int my_xpath_parse_LocationPath(MY_XPATH *xpath); +static int my_xpath_parse_AbsoluteLocationPath(MY_XPATH *xpath); +static int my_xpath_parse_RelativeLocationPath(MY_XPATH *xpath); +static int my_xpath_parse_AbbreviatedAbsoluteLocationPath(MY_XPATH *xpath); +static int my_xpath_parse_AbbreviatedStep(MY_XPATH *xpath); +static int my_xpath_parse_Step(MY_XPATH *xpath); +static int my_xpath_parse_AxisSpecifier(MY_XPATH *xpath); +static int my_xpath_parse_NodeTest(MY_XPATH *xpath); +static int my_xpath_parse_AbbreviatedAxisSpecifier(MY_XPATH *xpath); +static int my_xpath_parse_NameTest(MY_XPATH *xpath); +static int my_xpath_parse_FunctionCall(MY_XPATH *xpath); +static int my_xpath_parse_Number(MY_XPATH *xpath); +static int my_xpath_parse_FilterExpr(MY_XPATH *xpath); +static int my_xpath_parse_PathExpr(MY_XPATH *xpath); +static int my_xpath_parse_OrExpr(MY_XPATH *xpath); +static int my_xpath_parse_UnaryExpr(MY_XPATH *xpath); +static int my_xpath_parse_MultiplicativeExpr(MY_XPATH *xpath); +static int my_xpath_parse_AdditiveExpr(MY_XPATH *xpath); +static int my_xpath_parse_RelationalExpr(MY_XPATH *xpath); +static int my_xpath_parse_AndExpr(MY_XPATH *xpath); +static int my_xpath_parse_EqualityExpr(MY_XPATH *xpath); +static int my_xpath_parse_VariableReference(MY_XPATH *xpath); +static int my_xpath_parse_slash_opt_slash(MY_XPATH *xpath); + + +/* + Scan LocationPath + + SYNOPSYS + + [1] LocationPath ::= RelativeLocationPath + | AbsoluteLocationPath + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_LocationPath(MY_XPATH *xpath) +{ + Item *context= xpath->context; + + if (!xpath->context) + xpath->context= xpath->rootelement; + int rc= my_xpath_parse_RelativeLocationPath(xpath) || + my_xpath_parse_AbsoluteLocationPath(xpath); + + xpath->item= xpath->context; + xpath->context= context; + return rc; +} + + +/* + Scan Absolute Location Path + + SYNOPSYS + + [2] AbsoluteLocationPath ::= '/' RelativeLocationPath? + | AbbreviatedAbsoluteLocationPath + [10] AbbreviatedAbsoluteLocationPath ::= '//' RelativeLocationPath + + We combine these two rules into one rule for better performance: + + [2,10] AbsoluteLocationPath ::= '/' RelativeLocationPath? + | '//' RelativeLocationPath + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_AbsoluteLocationPath(MY_XPATH *xpath) +{ + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_SLASH)) + return 0; + + xpath->context= xpath->rootelement; + + if (my_xpath_parse_term(xpath, MY_XPATH_LEX_SLASH)) + { + xpath->context= new Item_nodeset_func_descendantbyname(xpath->context, + "*", 1, + xpath->pxml, 1); + return my_xpath_parse_RelativeLocationPath(xpath); + } + + if (my_xpath_parse_RelativeLocationPath(xpath)) + return 1; + + return 1; +} + + +/* + Scan Relative Location Path + + SYNOPSYS + + For better performance we combine these two rules + + [3] RelativeLocationPath ::= Step + | RelativeLocationPath '/' Step + | AbbreviatedRelativeLocationPath + [11] AbbreviatedRelativeLocationPath ::= RelativeLocationPath '//' Step + + + Into this one: + + [3-11] RelativeLocationPath ::= Step + | RelativeLocationPath '/' Step + | RelativeLocationPath '//' Step + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_RelativeLocationPath(MY_XPATH *xpath) +{ + if (!my_xpath_parse_Step(xpath)) + return 0; + while (my_xpath_parse_term(xpath, MY_XPATH_LEX_SLASH)) + { + if (my_xpath_parse_term(xpath, MY_XPATH_LEX_SLASH)) + xpath->context= new Item_nodeset_func_descendantbyname(xpath->context, + "*", 1, + xpath->pxml, 1); + if (!my_xpath_parse_Step(xpath)) + return 0; + } + return 1; +} + + +/* + Scan non-abbreviated or abbreviated Step + + SYNOPSYS + + [4] Step ::= AxisSpecifier NodeTest Predicate* + | AbbreviatedStep + [8] Predicate ::= '[' PredicateExpr ']' + + RETURN + 1 - success + 0 - failure +*/ +static int +my_xpath_parse_AxisSpecifier_NodeTest_opt_Predicate_list(MY_XPATH *xpath) +{ + if (!my_xpath_parse_AxisSpecifier(xpath)) + return 0; + + if (!my_xpath_parse_NodeTest(xpath)) + return 0; + + while (my_xpath_parse_term(xpath, MY_XPATH_LEX_LB)) + { + Item *prev_context= xpath->context; + String *context_cache; + context_cache= &((Item_nodeset_func*)xpath->context)->context_cache; + xpath->context= new Item_nodeset_context_cache(context_cache, xpath->pxml); + xpath->context_cache= context_cache; + + if(!my_xpath_parse_PredicateExpr(xpath)) + return 0; + + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_RB)) + return 0; + + xpath->item= nodeset2bool(xpath, xpath->item); + + if (xpath->item->is_bool_func()) + { + xpath->context= new Item_nodeset_func_predicate(prev_context, + xpath->item, + xpath->pxml); + } + else + { + xpath->context= new Item_nodeset_func_elementbyindex(prev_context, + xpath->item, + xpath->pxml); + } + } + return 1; +} + + +static int my_xpath_parse_Step(MY_XPATH *xpath) +{ + return + my_xpath_parse_AxisSpecifier_NodeTest_opt_Predicate_list(xpath) || + my_xpath_parse_AbbreviatedStep(xpath); +} + + +/* + Scan Abbreviated Axis Specifier + + SYNOPSYS + [5] AxisSpecifier ::= AxisName '::' + | AbbreviatedAxisSpecifier + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_AbbreviatedAxisSpecifier(MY_XPATH *xpath) +{ + if (my_xpath_parse_term(xpath, MY_XPATH_LEX_AT)) + xpath->axis= MY_XPATH_AXIS_ATTRIBUTE; + else + xpath->axis= MY_XPATH_AXIS_CHILD; + return 1; +} + + +/* + Scan non-abbreviated axis specifier + + SYNOPSYS + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_AxisName_colon_colon(MY_XPATH *xpath) +{ + return my_xpath_parse_AxisName(xpath) && + my_xpath_parse_term(xpath, MY_XPATH_LEX_COLON) && + my_xpath_parse_term(xpath, MY_XPATH_LEX_COLON); +} + + +/* + Scan Abbreviated AxisSpecifier + + SYNOPSYS + [13] AbbreviatedAxisSpecifier ::= '@'? + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_AxisSpecifier(MY_XPATH *xpath) +{ + return my_xpath_parse_AxisName_colon_colon(xpath) || + my_xpath_parse_AbbreviatedAxisSpecifier(xpath); +} + + +/* + Scan NodeType followed by parens + + SYNOPSYS + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_NodeTest_lp_rp(MY_XPATH *xpath) +{ + return my_xpath_parse_term(xpath, MY_XPATH_LEX_NODETYPE) && + my_xpath_parse_term(xpath, MY_XPATH_LEX_LP) && + my_xpath_parse_term(xpath, MY_XPATH_LEX_RP); +} + + +/* + Scan NodeTest + + SYNOPSYS + + [7] NodeTest ::= NameTest + | NodeType '(' ')' + | 'processing-instruction' '(' Literal ')' + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_NodeTest(MY_XPATH *xpath) +{ + return my_xpath_parse_NameTest(xpath) || + my_xpath_parse_NodeTest_lp_rp(xpath); +} + + +/* + Scan Abbreviated Step + + SYNOPSYS + + [12] AbbreviatedStep ::= '.' | '..' + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_AbbreviatedStep(MY_XPATH *xpath) +{ + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_DOT)) + return 0; + if (my_xpath_parse_term(xpath, MY_XPATH_LEX_DOT)) + xpath->context= new Item_nodeset_func_parentbyname(xpath->context, "*", 1, + xpath->pxml); + return 1; +} + + +/* + Scan Primary Expression + + SYNOPSYS + + [15] PrimaryExpr ::= VariableReference + | '(' Expr ')' + | Literal + | Number + | FunctionCall + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_lp_Expr_rp(MY_XPATH *xpath) +{ + return my_xpath_parse_term(xpath, MY_XPATH_LEX_LP) && + my_xpath_parse_Expr(xpath) && + my_xpath_parse_term(xpath, MY_XPATH_LEX_RP); +} +static int my_xpath_parse_PrimaryExpr_literal(MY_XPATH *xpath) +{ + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_STRING)) + return 0; + xpath->item= new Item_string(xpath->prevtok.beg + 1, + xpath->prevtok.end - xpath->prevtok.beg - 2, + xpath->cs); + return 1; +} +static int my_xpath_parse_PrimaryExpr(MY_XPATH *xpath) +{ + return + my_xpath_parse_lp_Expr_rp(xpath) || + my_xpath_parse_VariableReference(xpath) || + my_xpath_parse_PrimaryExpr_literal(xpath) || + my_xpath_parse_Number(xpath) || + my_xpath_parse_FunctionCall(xpath); +} + + +/* + Scan Function Call + + SYNOPSYS + [16] FunctionCall ::= FunctionName '(' ( Argument ( ',' Argument )* )? ')' + [17] Argument ::= Expr + + RETURN + 1 - success + 0 - failure + +*/ +static int my_xpath_parse_FunctionCall(MY_XPATH *xpath) +{ + Item *args[256]; + uint nargs; + + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_FUNC)) + return 0; + + MY_XPATH_FUNC *func= xpath->func; + + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_LP)) + return 0; + + for (nargs= 0 ; nargs < func->maxargs; ) + { + if (!my_xpath_parse_Expr(xpath)) + return 0; + args[nargs++]= xpath->item; + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_COMMA)) + { + if (nargs < func->minargs) + return 0; + else + break; + } + } + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_RP)) + return 0; + + return ((xpath->item= func->create(xpath, args, nargs))) ? 1 : 0; +} + + +/* + Scan Union Expression + + SYNOPSYS + [18] UnionExpr ::= PathExpr + | UnionExpr '|' PathExpr + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_UnionExpr(MY_XPATH *xpath) +{ + if (!my_xpath_parse_PathExpr(xpath)) + return 0; + + while (my_xpath_parse_term(xpath, MY_XPATH_LEX_VLINE)) + { + Item *prev= xpath->item; + if (prev->type() != Item::XPATH_NODESET) + return 0; + + if (!my_xpath_parse_PathExpr(xpath) + || xpath->item->type() != Item::XPATH_NODESET) + return 0; + xpath->item= new Item_nodeset_func_union(prev, xpath->item, xpath->pxml); + } + return 1; +} + + +/* + Scan Path Expression + + SYNOPSYS + + [19] PathExpr ::= LocationPath + | FilterExpr + | FilterExpr '/' RelativeLocationPath + | FilterExpr '//' RelativeLocationPath + RETURN + 1 - success + 0 - failure +*/ +static int +my_xpath_parse_FilterExpr_opt_slashes_RelativeLocationPath(MY_XPATH *xpath) +{ + if (!my_xpath_parse_FilterExpr(xpath)) + return 0; + + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_SLASH)) + return 1; + + my_xpath_parse_term(xpath, MY_XPATH_LEX_SLASH); + return my_xpath_parse_RelativeLocationPath(xpath); +} +static int my_xpath_parse_PathExpr(MY_XPATH *xpath) +{ + return my_xpath_parse_LocationPath(xpath) || + my_xpath_parse_FilterExpr_opt_slashes_RelativeLocationPath(xpath); +} + + + +/* + Scan Filter Expression + + SYNOPSYS + [20] FilterExpr ::= PrimaryExpr + | FilterExpr Predicate + + or in other words: + + [20] FilterExpr ::= PrimaryExpr Predicate* + + RETURN + 1 - success + 0 - failure + +*/ +static int my_xpath_parse_FilterExpr(MY_XPATH *xpath) +{ + return my_xpath_parse_PrimaryExpr(xpath); +} + + +/* + Scan Or Expression + + SYNOPSYS + [21] OrExpr ::= AndExpr + | OrExpr 'or' AndExpr + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_OrExpr(MY_XPATH *xpath) +{ + if (!my_xpath_parse_AndExpr(xpath)) + return 0; + + while (my_xpath_parse_term(xpath, MY_XPATH_LEX_OR)) + { + Item *prev= xpath->item; + if (!my_xpath_parse_AndExpr(xpath)) + return 0; + xpath->item= new Item_cond_or(nodeset2bool(xpath, prev), + nodeset2bool(xpath, xpath->item)); + } + return 1; +} + + +/* + Scan And Expression + + SYNOPSYS + [22] AndExpr ::= EqualityExpr + | AndExpr 'and' EqualityExpr + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_AndExpr(MY_XPATH *xpath) +{ + if (!my_xpath_parse_EqualityExpr(xpath)) + return 0; + + while (my_xpath_parse_term(xpath, MY_XPATH_LEX_AND)) + { + Item *prev= xpath->item; + if (!my_xpath_parse_EqualityExpr(xpath)) + return 0; + + xpath->item= new Item_cond_and(nodeset2bool(xpath,prev), + nodeset2bool(xpath,xpath->item)); + } + return 1; +} + + +/* + Scan Equality Expression + + SYNOPSYS + [23] EqualityExpr ::= RelationalExpr + | EqualityExpr '=' RelationalExpr + | EqualityExpr '!=' RelationalExpr + or in other words: + + [23] EqualityExpr ::= RelationalExpr ( EqualityOperator EqualityExpr )* + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_ne(MY_XPATH *xpath) +{ + MY_XPATH_LEX prevtok= xpath->prevtok; + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_EXCL)) + return 0; + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_EQ)) + { + /* Unget the exclamation mark */ + xpath->lasttok= xpath->prevtok; + xpath->prevtok= prevtok; + return 0; + } + return 1; +} +static int my_xpath_parse_EqualityOperator(MY_XPATH *xpath) +{ + if (my_xpath_parse_ne(xpath)) + { + xpath->extra= '!'; + return 1; + } + if (my_xpath_parse_term(xpath, MY_XPATH_LEX_EQ)) + { + xpath->extra= '='; + return 1; + } + return 0; +} +static int my_xpath_parse_EqualityExpr(MY_XPATH *xpath) +{ + if (!my_xpath_parse_RelationalExpr(xpath)) + return 0; + while (my_xpath_parse_EqualityOperator(xpath)) + { + Item *prev= xpath->item; + int oper= xpath->extra; + if (!my_xpath_parse_RelationalExpr(xpath)) + return 0; + + if (!(xpath->item= create_comparator(xpath, oper, prev, xpath->item))) + return 0; + } + return 1; +} + + +/* + Scan Relational Expression + + SYNOPSYS + + [24] RelationalExpr ::= AdditiveExpr + | RelationalExpr '<' AdditiveExpr + | RelationalExpr '>' AdditiveExpr + | RelationalExpr '<=' AdditiveExpr + | RelationalExpr '>=' AdditiveExpr + or in other words: + + [24] RelationalExpr ::= AdditiveExpr (RelationalOperator RelationalExpr)* + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_RelationalOperator(MY_XPATH *xpath) +{ + if (my_xpath_parse_term(xpath, MY_XPATH_LEX_LESS)) + { + xpath->extra= my_xpath_parse_term(xpath, MY_XPATH_LEX_EQ) ? + MY_XPATH_LEX_LE : MY_XPATH_LEX_LESS; + return 1; + } + else if (my_xpath_parse_term(xpath, MY_XPATH_LEX_GREATER)) + { + xpath->extra= my_xpath_parse_term(xpath, MY_XPATH_LEX_EQ) ? + MY_XPATH_LEX_GE : MY_XPATH_LEX_GREATER; + return 1; + } + return 0; +} +static int my_xpath_parse_RelationalExpr(MY_XPATH *xpath) +{ + if (!my_xpath_parse_AdditiveExpr(xpath)) + return 0; + while (my_xpath_parse_RelationalOperator(xpath)) + { + Item *prev= xpath->item; + int oper= xpath->extra; + + if (!my_xpath_parse_AdditiveExpr(xpath)) + return 0; + + if (!(xpath->item= create_comparator(xpath, oper, prev, xpath->item))) + return 0; + } + return 1; +} + + +/* + Scan Additive Expression + + SYNOPSYS + + [25] AdditiveExpr ::= MultiplicativeExpr + | AdditiveExpr '+' MultiplicativeExpr + | AdditiveExpr '-' MultiplicativeExpr + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_AdditiveOperator(MY_XPATH *xpath) +{ + return my_xpath_parse_term(xpath, MY_XPATH_LEX_PLUS) || + my_xpath_parse_term(xpath, MY_XPATH_LEX_MINUS); +} +static int my_xpath_parse_AdditiveExpr(MY_XPATH *xpath) +{ + if (!my_xpath_parse_MultiplicativeExpr(xpath)) + return 0; + + while (my_xpath_parse_AdditiveOperator(xpath)) + { + int oper= xpath->prevtok.term; + Item *prev= xpath->item; + if (!my_xpath_parse_MultiplicativeExpr(xpath)) + return 0; + + if (oper == MY_XPATH_LEX_PLUS) + xpath->item= new Item_func_plus(prev, xpath->item); + else + xpath->item= new Item_func_minus(prev, xpath->item); + }; + return 1; +} + + +/* + Scan Multiplicative Expression + + SYNOPSYS + + [26] MultiplicativeExpr ::= UnaryExpr + | MultiplicativeExpr MultiplyOperator UnaryExpr + | MultiplicativeExpr 'div' UnaryExpr + | MultiplicativeExpr 'mod' UnaryExpr + or in other words: + + [26] MultiplicativeExpr ::= UnaryExpr (MulOper MultiplicativeExpr)* + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_MultiplicativeOperator(MY_XPATH *xpath) +{ + return + my_xpath_parse_term(xpath, MY_XPATH_LEX_ASTERISK) || + my_xpath_parse_term(xpath, MY_XPATH_LEX_DIV) || + my_xpath_parse_term(xpath, MY_XPATH_LEX_MOD); +} +static int my_xpath_parse_MultiplicativeExpr(MY_XPATH *xpath) +{ + if (!my_xpath_parse_UnaryExpr(xpath)) + return 0; + + while (my_xpath_parse_MultiplicativeOperator(xpath)) + { + int oper= xpath->prevtok.term; + Item *prev= xpath->item; + if (!my_xpath_parse_UnaryExpr(xpath)) + return 0; + switch (oper) + { + case MY_XPATH_LEX_ASTERISK: + xpath->item= new Item_func_mul(prev, xpath->item); + break; + case MY_XPATH_LEX_DIV: + xpath->item= new Item_func_int_div(prev, xpath->item); + break; + case MY_XPATH_LEX_MOD: + xpath->item= new Item_func_mod(prev, xpath->item); + break; + } + } + return 1; +} + + +/* + Scan Unary Expression + + SYNOPSYS + + [27] UnaryExpr ::= UnionExpr + | '-' UnaryExpr + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_UnaryExpr(MY_XPATH *xpath) +{ + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_MINUS)) + return my_xpath_parse_UnionExpr(xpath); + if (!my_xpath_parse_UnaryExpr(xpath)) + return 0; + xpath->item= new Item_func_neg(xpath->item); + return 1; +} + + +/* + Scan Number + + SYNOPSYS + + [30] Number ::= Digits ('.' Digits?)? | '.' Digits) + + or in other words: + + [30] Number ::= Digits + | Digits '.' + | Digits '.' Digits + | '.' Digits + + Note: the last rule is not supported yet, + as it is in conflict with abbreviated step. + 1 + .123 does not work, + 1 + 0.123 does. + Perhaps it is better to move this code into lex analizer. + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_Number(MY_XPATH *xpath) +{ + const char *beg; + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_DIGITS)) + return 0; + beg= xpath->prevtok.beg; + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_DOT)) + { + xpath->item= new Item_int(xpath->prevtok.beg, + xpath->prevtok.end - xpath->prevtok.beg); + return 1; + } + my_xpath_parse_term(xpath, MY_XPATH_LEX_DIGITS); + + xpath->item= new Item_float(beg, xpath->prevtok.end - beg); + return 1; +} + + +/* + Scan Variable reference + + SYNOPSYS + + [36] VariableReference ::= '$' QName + RETURN + 1 - success + 0 - failure +*/ +static int +my_xpath_parse_VariableReference(MY_XPATH *xpath) +{ + return my_xpath_parse_term(xpath, MY_XPATH_LEX_DOLLAR) && + my_xpath_parse_term(xpath, MY_XPATH_LEX_IDENT); +} + + +/* + Scan Name Test + + SYNOPSYS + + [37] NameTest ::= '*' + | NCName ':' '*' + | QName + RETURN + 1 - success + 0 - failure +*/ +static int +my_xpath_parse_NodeTest_QName(MY_XPATH *xpath) +{ + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_IDENT)) + return 0; + DBUG_ASSERT(xpath->context); + uint len= xpath->prevtok.end - xpath->prevtok.beg; + xpath->context= nametestfunc(xpath, xpath->axis, xpath->context, + xpath->prevtok.beg, len); + return 1; +} +static int +my_xpath_parse_NodeTest_asterisk(MY_XPATH *xpath) +{ + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_ASTERISK)) + return 0; + DBUG_ASSERT(xpath->context); + xpath->context= nametestfunc(xpath, xpath->axis, xpath->context, "*", 1); + return 1; +} +static int +my_xpath_parse_NameTest(MY_XPATH *xpath) +{ + return my_xpath_parse_NodeTest_asterisk(xpath) || + my_xpath_parse_NodeTest_QName(xpath); +} + + +/* + Scan an XPath expression + + SYNOPSYS + Scan xpath expression. + The expression is returned in xpath->expr. + + RETURN + 1 - success + 0 - failure +*/ +static int +my_xpath_parse(MY_XPATH *xpath, const char *str, const char *strend) +{ + my_xpath_lex_init(&xpath->query, str, strend); + my_xpath_lex_init(&xpath->prevtok, str, strend); + my_xpath_lex_scan(xpath, &xpath->lasttok, str, strend); + + xpath->rootelement= new Item_nodeset_func_rootelement(xpath->pxml); + + return + my_xpath_parse_Expr(xpath) && + my_xpath_parse_term(xpath, MY_XPATH_LEX_EOF); +} + + +void Item_xml_str_func::fix_length_and_dec() +{ + String *xp, tmp; + MY_XPATH xpath; + int rc; + + nodeset_func= 0; + + if (agg_arg_charsets(collation, args, arg_count, MY_COLL_CMP_CONV)) + return; + + if (collation.collation->mbminlen > 1) + { + /* UCS2 is not supported */ + my_printf_error(ER_UNKNOWN_ERROR, + "Character set '%s' is not supported by XPATH", + MYF(0), collation.collation->csname); + return; + } + + if (!args[1]->const_item()) + { + my_printf_error(ER_UNKNOWN_ERROR, + "Only constant XPATH queries are supported", MYF(0)); + return; + } + + xp= args[1]->val_str(&tmp); + my_xpath_init(&xpath); + xpath.cs= collation.collation; + xpath.debug= 0; + xpath.pxml= &pxml; + + rc= my_xpath_parse(&xpath, xp->ptr(), xp->ptr() + xp->length()); + + if (!rc) + { + char context[32]; + uint clen= xpath.query.end - xpath.lasttok.beg; + set_if_bigger(clen, sizeof(context) - 1); + memcpy(context, xpath.lasttok.beg, clen); + context[clen]= '\0'; + my_printf_error(ER_UNKNOWN_ERROR, "XPATH syntax error: '%s'", + MYF(0), context); + return; + } + + nodeset_func= xpath.item; + if (nodeset_func) + nodeset_func->fix_fields(current_thd, &nodeset_func); + max_length= MAX_BLOB_WIDTH; +} + + +#define MAX_LEVEL 256 +typedef struct +{ + uint level; + String *pxml; // parsed XML + uint pos[MAX_LEVEL]; // Tag position stack +} MY_XML_USER_DATA; + + +/* + Find the parent node + + SYNOPSYS + Find the parent node, i.e. a tag or attrubute node on the given level. + + RETURN + 1 - success + 0 - failure +*/ +static uint xml_parent_tag(MY_XML_NODE *items, uint nitems, uint level) +{ + if (!nitems) + return 0; + + MY_XML_NODE *p, *last= &items[nitems-1]; + for (p= last; p >= items; p--) + { + if (p->level == level && + (p->type == MY_XML_NODE_TAG || + p->type == MY_XML_NODE_ATTR)) + { + return p - items; + } + } + return 0; +} + + +/* + Process tag beginning + + SYNOPSYS + + A call-back function executed when XML parser + is entering a tag or an attribue. + Appends the new node into data->pxml. + Increments data->level. + + RETURN + Currently only MY_XML_OK +*/ +static int xml_enter(MY_XML_PARSER *st,const char *attr, uint len) +{ + MY_XML_USER_DATA *data= (MY_XML_USER_DATA*)st->user_data; + MY_XML_NODE *nodes= (MY_XML_NODE*) data->pxml->ptr(); + uint numnodes= data->pxml->length() / sizeof(MY_XML_NODE); + uint parent= xml_parent_tag(nodes, numnodes, data->level - 1); + MY_XML_NODE node; + + data->pos[data->level]= numnodes; + node.level= data->level++; + node.type= st->current_node_type; // TAG or ATTR + node.beg= attr; + node.end= attr + len; + node.parent= parent; + data->pxml->append((const char*) &node, sizeof(MY_XML_NODE)); + return MY_XML_OK; +} + + +/* + Process text node + + SYNOPSYS + + A call-back function executed when XML parser + is entering into a tag or an attribue textual value. + The value is appended into data->pxml. + + RETURN + Currently only MY_XML_OK +*/ +static int xml_value(MY_XML_PARSER *st,const char *attr, uint len) +{ + MY_XML_USER_DATA *data= (MY_XML_USER_DATA*)st->user_data; + MY_XML_NODE *nodes= (MY_XML_NODE*) data->pxml->ptr(); + uint numnodes= data->pxml->length() / sizeof(MY_XML_NODE); + uint parent= xml_parent_tag(nodes, numnodes, data->level - 1); + MY_XML_NODE node; + + node.level= data->level; + node.type= MY_XML_NODE_TEXT; + node.beg= attr; + node.end= attr + len; + node.parent= parent; + data->pxml->append((const char*) &node, sizeof(MY_XML_NODE)); + return MY_XML_OK; +} + + +/* + Leave a tag or an attribute + + SYNOPSYS + + A call-back function executed when XML parser + is leaving a tag or an attribue. + Decrements data->level. + + RETURN + Currently only MY_XML_OK +*/ +static int xml_leave(MY_XML_PARSER *st,const char *attr, uint len) +{ + MY_XML_USER_DATA *data= (MY_XML_USER_DATA*)st->user_data; + DBUG_ASSERT(data->level > 0); + data->level--; + + MY_XML_NODE *nodes= (MY_XML_NODE*) data->pxml->ptr(); + nodes+= data->pos[data->level]; + nodes->tagend= st->cur; + + return MY_XML_OK; +} + + +/* + Parse raw XML + + SYNOPSYS + + + RETURN + Currently pointer to parsed XML on success + 0 on parse error +*/ +String *Item_xml_str_func::parse_xml(String *raw_xml, String *parsed_xml_buf) +{ + MY_XML_PARSER p; + MY_XML_USER_DATA user_data; + int rc; + + parsed_xml_buf->length(0); + + /* Prepare XML parser */ + my_xml_parser_create(&p); + p.flags= MY_XML_FLAG_RELATIVE_NAMES | MY_XML_FLAG_SKIP_TEXT_NORMALIZATION; + user_data.level= 0; + user_data.pxml= parsed_xml_buf; + my_xml_set_enter_handler(&p, xml_enter); + my_xml_set_value_handler(&p, xml_value); + my_xml_set_leave_handler(&p, xml_leave); + my_xml_set_user_data(&p, (void*) &user_data); + + /* Add root node */ + p.current_node_type= MY_XML_NODE_TAG; + xml_enter(&p, raw_xml->ptr(), 0); + + /* Execute XML parser */ + rc= my_xml_parse(&p, raw_xml->ptr(), raw_xml->length()); + my_xml_parser_free(&p); + + return rc == MY_XML_OK ? parsed_xml_buf : 0; +} + + +String *Item_func_xml_extractvalue::val_str(String *str) +{ + String *res; + if (!nodeset_func || + !(res= args[0]->val_str(str)) || + !parse_xml(res, &pxml)) + { + null_value= 1; + return 0; + } + res= nodeset_func->val_str(&tmp_value); + return res; +} + + +String *Item_func_xml_update::val_str(String *str) +{ + String *res, *nodeset, *rep; + + if (!nodeset_func || + !(res= args[0]->val_str(str)) || + !(rep= args[2]->val_str(&tmp_value3)) || + !parse_xml(res, &pxml) || + !(nodeset= nodeset_func->val_nodeset(&tmp_value2))) + { + null_value= 1; + return 0; + } + + MY_XML_NODE *nodebeg= (MY_XML_NODE*) pxml.ptr(); + MY_XML_NODE *nodeend= (MY_XML_NODE*) pxml.ptr() + pxml.length(); + MY_XPATH_FLT *fltbeg= (MY_XPATH_FLT*) nodeset->ptr(); + MY_XPATH_FLT *fltend= (MY_XPATH_FLT*) (nodeset->ptr() + nodeset->length()); + + /* Allow replacing of one tag only */ + if (fltend - fltbeg != 1) + { + /* TODO: perhaps add a warning that more than one tag selected */ + return res; + } + + nodebeg+= fltbeg->num; + + tmp_value.length(0); + tmp_value.set_charset(collation.collation); + uint offs= nodebeg->type == MY_XML_NODE_TAG ? 1 : 0; + tmp_value.append(res->ptr(), nodebeg->beg - res->ptr() - offs); + tmp_value.append(rep->ptr(), rep->length()); + const char *end= nodebeg->tagend + offs; + tmp_value.append(end, res->ptr() + res->length() - end); + return &tmp_value; +} diff --git a/sql/item_xmlfunc.h b/sql/item_xmlfunc.h new file mode 100644 index 00000000000..bc47e9c5bb1 --- /dev/null +++ b/sql/item_xmlfunc.h @@ -0,0 +1,56 @@ +/* Copyright (C) 2000-2005 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + + +/* This file defines all XML functions */ + + +#ifdef __GNUC__ +#pragma interface /* gcc class implementation */ +#endif + + +class Item_xml_str_func: public Item_str_func +{ +protected: + String tmp_value, pxml; + Item *nodeset_func; +public: + Item_xml_str_func(Item *a, Item *b): Item_str_func(a,b) {} + Item_xml_str_func(Item *a, Item *b, Item *c): Item_str_func(a,b,c) {} + void fix_length_and_dec(); + String *parse_xml(String *raw_xml, String *parsed_xml_buf); +}; + + +class Item_func_xml_extractvalue: public Item_xml_str_func +{ +public: + Item_func_xml_extractvalue(Item *a,Item *b) :Item_xml_str_func(a,b) {} + const char *func_name() const { return "extractvalue"; } + String *val_str(String *); +}; + + +class Item_func_xml_update: public Item_xml_str_func +{ + String tmp_value2, tmp_value3; +public: + Item_func_xml_update(Item *a,Item *b,Item *c) :Item_xml_str_func(a,b,c) {} + const char *func_name() const { return "updatexml"; } + String *val_str(String *); +}; + diff --git a/sql/key.cc b/sql/key.cc index 9d86095f33e..4001c6177a1 100644 --- a/sql/key.cc +++ b/sql/key.cc @@ -28,7 +28,7 @@ ** Used when calculating key for NEXT_NUMBER */ -int find_ref_key(TABLE *table,Field *field, uint *key_length) +int find_ref_key(KEY *key, uint key_count, Field *field, uint *key_length) { reg2 int i; reg3 KEY *key_info; @@ -38,8 +38,8 @@ int find_ref_key(TABLE *table,Field *field, uint *key_length) /* Test if some key starts as fieldpos */ - for (i= 0, key_info= table->key_info ; - i < (int) table->s->keys ; + for (i= 0, key_info= key ; + i < (int) key_count ; i++, key_info++) { if (key_info->key_part[0].offset == fieldpos) @@ -50,8 +50,8 @@ int find_ref_key(TABLE *table,Field *field, uint *key_length) } /* Test if some key contains fieldpos */ - for (i= 0, key_info= table->key_info ; - i < (int) table->s->keys ; + for (i= 0, key_info= key; + i < (int) key_count ; i++, key_info++) { uint j; @@ -429,3 +429,86 @@ int key_cmp(KEY_PART_INFO *key_part, const byte *key, uint key_length) } return 0; // Keys are equal } + + +/* + Compare two records in index order + SYNOPSIS + key_rec_cmp() + key Index information + rec0 Pointer to table->record[0] + first_rec Pointer to record compare with + second_rec Pointer to record compare against first_rec + DESCRIPTION + This method is set-up such that it can be called directly from the + priority queue and it is attempted to be optimised as much as possible + since this will be called O(N * log N) times while performing a merge + sort in various places in the code. + + We retrieve the pointer to table->record[0] using the fact that key_parts + have an offset making it possible to calculate the start of the record. + We need to get the diff to the compared record since none of the records + being compared are stored in table->record[0]. + + We first check for NULL values, if there are no NULL values we use + a compare method that gets two field pointers and a max length + and return the result of the comparison. +*/ + +int key_rec_cmp(void *key, byte *first_rec, byte *second_rec) +{ + KEY *key_info= (KEY*)key; + uint key_parts= key_info->key_parts, i= 0; + KEY_PART_INFO *key_part= key_info->key_part; + char *rec0= key_part->field->ptr - key_part->offset; + my_ptrdiff_t first_diff= first_rec - (byte*)rec0, sec_diff= second_rec - (byte*)rec0; + int result= 0; + DBUG_ENTER("key_rec_cmp"); + + do + { + Field *field= key_part->field; + uint length; + + if (key_part->null_bit) + { + /* The key_part can contain NULL values */ + bool first_is_null= field->is_null_in_record_with_offset(first_diff); + bool sec_is_null= field->is_null_in_record_with_offset(sec_diff); + /* + NULL is smaller then everything so if first is NULL and the other + not then we know that we should return -1 and for the opposite + we should return +1. If both are NULL then we call it equality + although it is a strange form of equality, we have equally little + information of the real value. + */ + if (!first_is_null) + { + if (!sec_is_null) + ; /* Fall through, no NULL fields */ + else + { + DBUG_RETURN(+1); + } + } + else if (!sec_is_null) + { + DBUG_RETURN(-1); + } + else + goto next_loop; /* Both were NULL */ + } + /* + No null values in the fields + We use the virtual method cmp_max with a max length parameter. + For most field types this translates into a cmp without + max length. The exceptions are the BLOB and VARCHAR field types + that take the max length into account. + */ + result= field->cmp_max(field->ptr+first_diff, field->ptr+sec_diff, + key_part->length); +next_loop: + key_part++; + } while (!result && ++i < key_parts); + DBUG_RETURN(result); +} diff --git a/sql/lex.h b/sql/lex.h index 1acfbaac211..574d7036c8a 100644 --- a/sql/lex.h +++ b/sql/lex.h @@ -59,6 +59,7 @@ static SYMBOL symbols[] = { { "<<", SYM(SHIFT_LEFT)}, { ">>", SYM(SHIFT_RIGHT)}, { "<=>", SYM(EQUAL_SYM)}, + { "ACCESSIBLE", SYM(ACCESSIBLE_SYM)}, { "ACTION", SYM(ACTION)}, { "ADD", SYM(ADD)}, { "AFTER", SYM(AFTER_SYM)}, @@ -74,7 +75,10 @@ static SYMBOL symbols[] = { { "ASC", SYM(ASC)}, { "ASCII", SYM(ASCII_SYM)}, { "ASENSITIVE", SYM(ASENSITIVE_SYM)}, + { "AT", SYM(AT_SYM)}, + { "AUTHORS", SYM(AUTHORS_SYM)}, { "AUTO_INCREMENT", SYM(AUTO_INC)}, + { "AUTOEXTEND_SIZE", SYM(AUTOEXTEND_SIZE_SYM)}, { "AVG", SYM(AVG_SYM)}, { "AVG_ROW_LENGTH", SYM(AVG_ROW_LENGTH)}, { "BACKUP", SYM(BACKUP_SYM)}, @@ -110,6 +114,7 @@ static SYMBOL symbols[] = { { "CIPHER", SYM(CIPHER_SYM)}, { "CLIENT", SYM(CLIENT_SYM)}, { "CLOSE", SYM(CLOSE_SYM)}, + { "COALESCE", SYM(COALESCE)}, { "CODE", SYM(CODE_SYM)}, { "COLLATE", SYM(COLLATE_SYM)}, { "COLLATION", SYM(COLLATION_SYM)}, @@ -119,6 +124,7 @@ static SYMBOL symbols[] = { { "COMMIT", SYM(COMMIT_SYM)}, { "COMMITTED", SYM(COMMITTED_SYM)}, { "COMPACT", SYM(COMPACT_SYM)}, + { "COMPLETION", SYM(COMPLETION_SYM)}, { "COMPRESSED", SYM(COMPRESSED_SYM)}, { "CONCURRENT", SYM(CONCURRENT)}, { "CONDITION", SYM(CONDITION_SYM)}, @@ -139,6 +145,7 @@ static SYMBOL symbols[] = { { "DATA", SYM(DATA_SYM)}, { "DATABASE", SYM(DATABASE)}, { "DATABASES", SYM(DATABASES)}, + { "DATAFILE", SYM(DATAFILE_SYM)}, { "DATE", SYM(DATE_SYM)}, { "DATETIME", SYM(DATETIME)}, { "DAY", SYM(DAY_SYM)}, @@ -162,6 +169,7 @@ static SYMBOL symbols[] = { { "DIRECTORY", SYM(DIRECTORY_SYM)}, { "DISABLE", SYM(DISABLE_SYM)}, { "DISCARD", SYM(DISCARD)}, + { "DISK", SYM(DISK_SYM)}, { "DISTINCT", SYM(DISTINCT)}, { "DISTINCTROW", SYM(DISTINCT)}, /* Access likes this */ { "DIV", SYM(DIV_SYM)}, @@ -178,19 +186,23 @@ static SYMBOL symbols[] = { { "ENABLE", SYM(ENABLE_SYM)}, { "ENCLOSED", SYM(ENCLOSED)}, { "END", SYM(END)}, + { "ENDS", SYM(ENDS_SYM)}, { "ENGINE", SYM(ENGINE_SYM)}, { "ENGINES", SYM(ENGINES_SYM)}, { "ENUM", SYM(ENUM)}, { "ERRORS", SYM(ERRORS)}, { "ESCAPE", SYM(ESCAPE_SYM)}, { "ESCAPED", SYM(ESCAPED)}, + { "EVENT", SYM(EVENT_SYM)}, { "EVENTS", SYM(EVENTS_SYM)}, + { "EVERY", SYM(EVERY_SYM)}, { "EXECUTE", SYM(EXECUTE_SYM)}, { "EXISTS", SYM(EXISTS)}, { "EXIT", SYM(EXIT_SYM)}, { "EXPANSION", SYM(EXPANSION_SYM)}, { "EXPLAIN", SYM(DESCRIBE)}, { "EXTENDED", SYM(EXTENDED_SYM)}, + { "EXTENT_SIZE", SYM(EXTENT_SIZE_SYM)}, { "FALSE", SYM(FALSE_SYM)}, { "FAST", SYM(FAST_SYM)}, { "FETCH", SYM(FETCH_SYM)}, @@ -239,6 +251,7 @@ static SYMBOL symbols[] = { { "INDEX", SYM(INDEX_SYM)}, { "INDEXES", SYM(INDEXES)}, { "INFILE", SYM(INFILE)}, + { "INITIAL_SIZE", SYM(INITIAL_SIZE_SYM)}, { "INNER", SYM(INNER_SYM)}, { "INNOBASE", SYM(INNOBASE_SYM)}, { "INNODB", SYM(INNOBASE_SYM)}, @@ -246,6 +259,7 @@ static SYMBOL symbols[] = { { "INSENSITIVE", SYM(INSENSITIVE_SYM)}, { "INSERT", SYM(INSERT)}, { "INSERT_METHOD", SYM(INSERT_METHOD)}, + { "INSTALL", SYM(INSTALL_SYM)}, { "INT", SYM(INT_SYM)}, { "INT1", SYM(TINYINT)}, { "INT2", SYM(SMALLINT)}, @@ -275,17 +289,21 @@ static SYMBOL symbols[] = { { "LEAVE", SYM(LEAVE_SYM)}, { "LEAVES", SYM(LEAVES)}, { "LEFT", SYM(LEFT)}, + { "LESS", SYM(LESS_SYM)}, { "LEVEL", SYM(LEVEL_SYM)}, { "LIKE", SYM(LIKE)}, { "LIMIT", SYM(LIMIT)}, + { "LINEAR", SYM(LINEAR_SYM)}, { "LINES", SYM(LINES)}, { "LINESTRING", SYM(LINESTRING)}, + { "LIST", SYM(LIST_SYM)}, { "LOAD", SYM(LOAD)}, { "LOCAL", SYM(LOCAL_SYM)}, { "LOCALTIME", SYM(NOW_SYM)}, { "LOCALTIMESTAMP", SYM(NOW_SYM)}, { "LOCK", SYM(LOCK_SYM)}, { "LOCKS", SYM(LOCKS_SYM)}, + { "LOGFILE", SYM(LOGFILE_SYM)}, { "LOGS", SYM(LOGS_SYM)}, { "LONG", SYM(LONG_SYM)}, { "LONGBLOB", SYM(LONGBLOB)}, @@ -311,12 +329,15 @@ static SYMBOL symbols[] = { { "MAX_CONNECTIONS_PER_HOUR", SYM(MAX_CONNECTIONS_PER_HOUR)}, { "MAX_QUERIES_PER_HOUR", SYM(MAX_QUERIES_PER_HOUR)}, { "MAX_ROWS", SYM(MAX_ROWS)}, + { "MAX_SIZE", SYM(MAX_SIZE_SYM)}, { "MAX_UPDATES_PER_HOUR", SYM(MAX_UPDATES_PER_HOUR)}, { "MAX_USER_CONNECTIONS", SYM(MAX_USER_CONNECTIONS_SYM)}, + { "MAXVALUE", SYM(MAX_VALUE_SYM)}, { "MEDIUM", SYM(MEDIUM_SYM)}, { "MEDIUMBLOB", SYM(MEDIUMBLOB)}, { "MEDIUMINT", SYM(MEDIUMINT)}, { "MEDIUMTEXT", SYM(MEDIUMTEXT)}, + { "MEMORY", SYM(MEMORY_SYM)}, { "MERGE", SYM(MERGE_SYM)}, { "MICROSECOND", SYM(MICROSECOND_SYM)}, { "MIDDLEINT", SYM(MEDIUMINT)}, /* For powerbuilder */ @@ -344,6 +365,8 @@ static SYMBOL symbols[] = { { "NEW", SYM(NEW_SYM)}, { "NEXT", SYM(NEXT_SYM)}, { "NO", SYM(NO_SYM)}, + { "NO_WAIT", SYM(NO_WAIT_SYM)}, + { "NODEGROUP", SYM(NODEGROUP_SYM)}, { "NONE", SYM(NONE_SYM)}, { "NOT", SYM(NOT_SYM)}, { "NO_WRITE_TO_BINLOG", SYM(NO_WRITE_TO_BINLOG)}, @@ -365,13 +388,18 @@ static SYMBOL symbols[] = { { "OUTER", SYM(OUTER)}, { "OUTFILE", SYM(OUTFILE)}, { "PACK_KEYS", SYM(PACK_KEYS_SYM)}, + { "PARSER", SYM(PARSER_SYM)}, { "PARTIAL", SYM(PARTIAL)}, + { "PARTITION", SYM(PARTITION_SYM)}, + { "PARTITIONS", SYM(PARTITIONS_SYM)}, { "PASSWORD", SYM(PASSWORD)}, { "PHASE", SYM(PHASE_SYM)}, + { "PLUGIN", SYM(PLUGIN_SYM)}, { "POINT", SYM(POINT_SYM)}, { "POLYGON", SYM(POLYGON)}, { "PRECISION", SYM(PRECISION)}, { "PREPARE", SYM(PREPARE_SYM)}, + { "PRESERVE", SYM(PRESERVE_SYM)}, { "PREV", SYM(PREV_SYM)}, { "PRIMARY", SYM(PRIMARY_SYM)}, { "PRIVILEGES", SYM(PRIVILEGES)}, @@ -382,14 +410,16 @@ static SYMBOL symbols[] = { { "QUARTER", SYM(QUARTER_SYM)}, { "QUERY", SYM(QUERY_SYM)}, { "QUICK", SYM(QUICK)}, - { "RAID0", SYM(RAID_0_SYM)}, - { "RAID_CHUNKS", SYM(RAID_CHUNKS)}, - { "RAID_CHUNKSIZE", SYM(RAID_CHUNKSIZE)}, - { "RAID_TYPE", SYM(RAID_TYPE)}, + { "RANGE", SYM(RANGE_SYM)}, { "READ", SYM(READ_SYM)}, + { "READ_ONLY", SYM(READ_ONLY_SYM)}, + { "READ_WRITE", SYM(READ_WRITE_SYM)}, { "READS", SYM(READS_SYM)}, { "REAL", SYM(REAL)}, + { "REBUILD", SYM(REBUILD_SYM)}, { "RECOVER", SYM(RECOVER_SYM)}, + { "REDO_BUFFER_SIZE", SYM(REDO_BUFFER_SIZE_SYM)}, + { "REDOFILE", SYM(REDOFILE_SYM)}, { "REDUNDANT", SYM(REDUNDANT_SYM)}, { "REFERENCES", SYM(REFERENCES)}, { "REGEXP", SYM(REGEXP)}, @@ -399,6 +429,7 @@ static SYMBOL symbols[] = { { "RELEASE", SYM(RELEASE_SYM)}, { "RELOAD", SYM(RELOAD)}, { "RENAME", SYM(RENAME)}, + { "REORGANIZE", SYM(REORGANIZE_SYM)}, { "REPAIR", SYM(REPAIR)}, { "REPEATABLE", SYM(REPEATABLE_SYM)}, { "REPLACE", SYM(REPLACE)}, @@ -422,6 +453,7 @@ static SYMBOL symbols[] = { { "ROW_FORMAT", SYM(ROW_FORMAT_SYM)}, { "RTREE", SYM(RTREE_SYM)}, { "SAVEPOINT", SYM(SAVEPOINT_SYM)}, + { "SCHEDULE", SYM(SCHEDULE_SYM)}, { "SCHEMA", SYM(DATABASE)}, { "SCHEMAS", SYM(DATABASES)}, { "SECOND", SYM(SECOND_SYM)}, @@ -443,7 +475,7 @@ static SYMBOL symbols[] = { { "SNAPSHOT", SYM(SNAPSHOT_SYM)}, { "SMALLINT", SYM(SMALLINT)}, { "SOME", SYM(ANY_SYM)}, - { "SONAME", SYM(UDF_SONAME_SYM)}, + { "SONAME", SYM(SONAME_SYM)}, { "SOUNDS", SYM(SOUNDS_SYM)}, { "SPATIAL", SYM(SPATIAL_SYM)}, { "SPECIFIC", SYM(SPECIFIC_SYM)}, @@ -470,13 +502,15 @@ static SYMBOL symbols[] = { { "SSL", SYM(SSL_SYM)}, { "START", SYM(START_SYM)}, { "STARTING", SYM(STARTING)}, + { "STARTS", SYM(STARTS_SYM)}, { "STATUS", SYM(STATUS_SYM)}, { "STOP", SYM(STOP_SYM)}, { "STORAGE", SYM(STORAGE_SYM)}, { "STRAIGHT_JOIN", SYM(STRAIGHT_JOIN)}, { "STRING", SYM(STRING_SYM)}, - { "STRIPED", SYM(RAID_STRIPED_SYM)}, { "SUBJECT", SYM(SUBJECT_SYM)}, + { "SUBPARTITION", SYM(SUBPARTITION_SYM)}, + { "SUBPARTITIONS", SYM(SUBPARTITIONS_SYM)}, { "SUPER", SYM(SUPER_SYM)}, { "SUSPEND", SYM(SUSPEND_SYM)}, { "TABLE", SYM(TABLE_SYM)}, @@ -486,6 +520,7 @@ static SYMBOL symbols[] = { { "TEMPTABLE", SYM(TEMPTABLE_SYM)}, { "TERMINATED", SYM(TERMINATED)}, { "TEXT", SYM(TEXT_SYM)}, + { "THAN", SYM(THAN_SYM)}, { "THEN", SYM(THEN_SYM)}, { "TIME", SYM(TIME_SYM)}, { "TIMESTAMP", SYM(TIMESTAMP)}, @@ -505,12 +540,15 @@ static SYMBOL symbols[] = { { "TYPES", SYM(TYPES_SYM)}, { "UNCOMMITTED", SYM(UNCOMMITTED_SYM)}, { "UNDEFINED", SYM(UNDEFINED_SYM)}, + { "UNDO_BUFFER_SIZE", SYM(UNDO_BUFFER_SIZE_SYM)}, + { "UNDOFILE", SYM(UNDOFILE_SYM)}, { "UNDO", SYM(UNDO_SYM)}, { "UNICODE", SYM(UNICODE_SYM)}, { "UNION", SYM(UNION_SYM)}, { "UNIQUE", SYM(UNIQUE_SYM)}, { "UNKNOWN", SYM(UNKNOWN_SYM)}, { "UNLOCK", SYM(UNLOCK_SYM)}, + { "UNINSTALL", SYM(UNINSTALL_SYM)}, { "UNSIGNED", SYM(UNSIGNED)}, { "UNTIL", SYM(UNTIL_SYM)}, { "UPDATE", SYM(UPDATE_SYM)}, @@ -531,6 +569,7 @@ static SYMBOL symbols[] = { { "VARCHARACTER", SYM(VARCHAR)}, { "VARIABLES", SYM(VARIABLES)}, { "VARYING", SYM(VARYING)}, + { "WAIT", SYM(WAIT_SYM)}, { "WARNINGS", SYM(WARNINGS)}, { "WEEK", SYM(WEEK_SYM)}, { "WHEN", SYM(WHEN_SYM)}, @@ -578,7 +617,6 @@ static SYMBOL sql_functions[] = { { "CENTROID", F_SYM(FUNC_ARG1),0,CREATE_FUNC_GEOM(create_func_centroid)}, { "CHAR_LENGTH", F_SYM(FUNC_ARG1),0,CREATE_FUNC(create_func_char_length)}, { "CHARACTER_LENGTH", F_SYM(FUNC_ARG1),0,CREATE_FUNC(create_func_char_length)}, - { "COALESCE", SYM(COALESCE)}, { "COERCIBILITY", F_SYM(FUNC_ARG1),0,CREATE_FUNC(create_func_coercibility)}, { "COMPRESS", F_SYM(FUNC_ARG1),0,CREATE_FUNC(create_func_compress)}, { "CONCAT", SYM(CONCAT)}, @@ -615,6 +653,7 @@ static SYMBOL sql_functions[] = { { "EQUALS", F_SYM(FUNC_ARG2),0,CREATE_FUNC_GEOM(create_func_equals)}, { "EXTERIORRING", F_SYM(FUNC_ARG1),0,CREATE_FUNC_GEOM(create_func_exteriorring)}, { "EXTRACT", SYM(EXTRACT_SYM)}, + { "EXTRACTVALUE", F_SYM(FUNC_ARG2),0,CREATE_FUNC(create_func_xml_extractvalue)}, { "EXP", F_SYM(FUNC_ARG1),0,CREATE_FUNC(create_func_exp)}, { "EXPORT_SET", SYM(EXPORT_SET)}, { "FIELD", SYM(FIELD_FUNC)}, /* For compability */ @@ -769,6 +808,7 @@ static SYMBOL sql_functions[] = { { "UNHEX", F_SYM(FUNC_ARG1),0,CREATE_FUNC(create_func_unhex)}, { "UNIQUE_USERS", SYM(UNIQUE_USERS)}, { "UNIX_TIMESTAMP", SYM(UNIX_TIMESTAMP)}, + { "UPDATEXML", F_SYM(FUNC_ARG3),0,CREATE_FUNC(create_func_xml_update)}, { "UPPER", F_SYM(FUNC_ARG1),0,CREATE_FUNC(create_func_ucase)}, { "UUID", F_SYM(FUNC_ARG0),0,CREATE_FUNC(create_func_uuid)}, { "VARIANCE", SYM(VARIANCE_SYM)}, diff --git a/sql/lock.cc b/sql/lock.cc index d0bfcfd7272..9cd0dcce610 100644 --- a/sql/lock.cc +++ b/sql/lock.cc @@ -72,7 +72,7 @@ TODO: #ifndef MASTER #include "../srclib/myisammrg/myrg_def.h" #else -#include "../myisammrg/myrg_def.h" +#include "../storage/myisammrg/myrg_def.h" #endif static MYSQL_LOCK *get_lock_data(THD *thd, TABLE **table,uint count, @@ -146,6 +146,7 @@ MYSQL_LOCK *mysql_lock_tables(THD *thd, TABLE **tables, uint count, } thd->proc_info="System lock"; + DBUG_PRINT("info", ("thd->proc_info %s", thd->proc_info)); if (lock_external(thd, tables, count)) { my_free((gptr) sql_lock,MYF(0)); @@ -153,6 +154,7 @@ MYSQL_LOCK *mysql_lock_tables(THD *thd, TABLE **tables, uint count, break; } thd->proc_info="Table lock"; + DBUG_PRINT("info", ("thd->proc_info %s", thd->proc_info)); thd->locked=1; rc= thr_lock_errno_to_mysql[(int) thr_multi_lock(sql_lock->locks, sql_lock->lock_count, @@ -218,6 +220,7 @@ static int lock_external(THD *thd, TABLE **tables, uint count) int lock_type,error; DBUG_ENTER("lock_external"); + DBUG_PRINT("info", ("count %d", count)); for (i=1 ; i <= count ; i++, tables++) { DBUG_ASSERT((*tables)->reginfo.lock_type >= TL_READ); @@ -226,7 +229,6 @@ static int lock_external(THD *thd, TABLE **tables, uint count) ((*tables)->reginfo.lock_type >= TL_READ && (*tables)->reginfo.lock_type <= TL_READ_NO_INSERT)) lock_type=F_RDLCK; - if ((error=(*tables)->file->external_lock(thd,lock_type))) { print_lock_error(error, (*tables)->file->table_type()); @@ -349,18 +351,37 @@ void mysql_lock_remove(THD *thd, MYSQL_LOCK *locked,TABLE *table) } } +/* Downgrade all locks on a table to new WRITE level from WRITE_ONLY */ + +void mysql_lock_downgrade_write(THD *thd, TABLE *table, + thr_lock_type new_lock_type) +{ + MYSQL_LOCK *locked; + TABLE *write_lock_used; + if ((locked = get_lock_data(thd,&table,1,1,&write_lock_used))) + { + for (uint i=0; i < locked->lock_count; i++) + thr_downgrade_write_lock(locked->locks[i], new_lock_type); + my_free((gptr) locked,MYF(0)); + } +} + + /* abort all other threads waiting to get lock in table */ -void mysql_lock_abort(THD *thd, TABLE *table) +void mysql_lock_abort(THD *thd, TABLE *table, bool upgrade_lock) { MYSQL_LOCK *locked; TABLE *write_lock_used; + DBUG_ENTER("mysql_lock_abort"); + if ((locked = get_lock_data(thd,&table,1,1,&write_lock_used))) { for (uint i=0; i < locked->lock_count; i++) - thr_abort_locks(locked->locks[i]->lock); + thr_abort_locks(locked->locks[i]->lock, upgrade_lock); my_free((gptr) locked,MYF(0)); } + DBUG_VOID_RETURN; } @@ -583,6 +604,7 @@ static MYSQL_LOCK *get_lock_data(THD *thd, TABLE **table_ptr, uint count, TABLE **to; DBUG_ENTER("get_lock_data"); + DBUG_PRINT("info", ("count %d", count)); *write_lock_used=0; for (i=tables=lock_count=0 ; i < count ; i++) { @@ -592,18 +614,15 @@ static MYSQL_LOCK *get_lock_data(THD *thd, TABLE **table_ptr, uint count, lock_count++; } /* - To be able to open and lock for reading system tables like 'mysql.proc', - when we already have some tables opened and locked, and avoid deadlocks - we have to disallow write-locking of these tables with any other tables. + Check if we can lock the table. For some tables we cannot do that + beacause of handler-specific locking issues. */ - if (table_ptr[i]->s->system_table && - table_ptr[i]->reginfo.lock_type >= TL_WRITE_ALLOW_WRITE && - count != 1) - { - my_error(ER_WRONG_LOCK_OF_SYSTEM_TABLE, MYF(0), table_ptr[i]->s->db, - table_ptr[i]->s->table_name); - return 0; - } + if (!table_ptr[i]-> file-> + check_if_locking_is_allowed(thd->lex->sql_command, thd->lex->type, + table_ptr[i], count, + (thd == logger.get_general_log_thd()) || + (thd == logger.get_slow_log_thd()))) + DBUG_RETURN(0); } if (!(sql_lock= (MYSQL_LOCK*) @@ -615,6 +634,8 @@ static MYSQL_LOCK *get_lock_data(THD *thd, TABLE **table_ptr, uint count, to=sql_lock->table=(TABLE**) (locks+tables); sql_lock->table_count=lock_count; sql_lock->lock_count=tables; + DBUG_PRINT("info", ("sql_lock->table_count %d sql_lock->lock_count %d", + sql_lock->table_count, sql_lock->lock_count)); for (i=0 ; i < count ; i++) { @@ -727,32 +748,35 @@ int lock_table_name(THD *thd, TABLE_LIST *table_list) DBUG_ENTER("lock_table_name"); DBUG_PRINT("enter",("db: %s name: %s", db, table_list->table_name)); - safe_mutex_assert_owner(&LOCK_open); - - key_length=(uint) (strmov(strmov(key,db)+1,table_list->table_name) - -key)+ 1; - + key_length= create_table_def_key(thd, key, table_list, 0); /* Only insert the table if we haven't insert it already */ for (table=(TABLE*) hash_first(&open_cache, (byte*)key, key_length, &state); table ; - table = (TABLE*) hash_next(&open_cache, (byte*)key, key_length, &state)) + table = (TABLE*) hash_next(&open_cache,(byte*) key,key_length, &state)) + { if (table->in_use == thd) + { + DBUG_PRINT("info", ("Table is in use")); + table->s->version= 0; // Ensure no one can use this + table->locked_by_name= 1; DBUG_RETURN(0); - + } + } /* Create a table entry with the right key and with an old refresh version Note that we must use my_malloc() here as this is freed by the table cache */ - if (!(table= (TABLE*) my_malloc(sizeof(*table)+key_length, - MYF(MY_WME | MY_ZEROFILL)))) + if (!(table= (TABLE*) my_malloc(sizeof(*table)+ sizeof(TABLE_SHARE)+ + key_length, MYF(MY_WME | MY_ZEROFILL)))) DBUG_RETURN(-1); - table->s= &table->share_not_to_be_used; - memcpy((table->s->table_cache_key= (char*) (table+1)), key, key_length); - table->s->db= table->s->table_cache_key; - table->s->key_length=key_length; - table->in_use=thd; + table->s= (TABLE_SHARE*) (table+1); + memcpy((table->s->table_cache_key.str= (char*) (table->s+1)), key, + key_length); + table->s->table_cache_key.length= key_length; + table->s->tmp_table= INTERNAL_TMP_TABLE; // for intern_close_table + table->in_use= thd; table->locked_by_name=1; table_list->table=table; @@ -782,8 +806,17 @@ static bool locked_named_table(THD *thd, TABLE_LIST *table_list) { for (; table_list ; table_list=table_list->next_local) { - if (table_list->table && table_is_used(table_list->table,0)) - return 1; + TABLE *table= table_list->table; + if (table) + { + TABLE *save_next= table->next; + bool result; + table->next= 0; + result= table_is_used(table_list->table, 0); + table->next= save_next; + if (result) + return 1; + } } return 0; // All tables are locked } @@ -793,6 +826,7 @@ bool wait_for_locked_table_names(THD *thd, TABLE_LIST *table_list) { bool result=0; DBUG_ENTER("wait_for_locked_table_names"); + safe_mutex_assert_owner(&LOCK_open); while (locked_named_table(thd,table_list)) @@ -802,7 +836,7 @@ bool wait_for_locked_table_names(THD *thd, TABLE_LIST *table_list) result=1; break; } - wait_for_refresh(thd); + wait_for_condition(thd, &LOCK_open, &COND_refresh); pthread_mutex_lock(&LOCK_open); } DBUG_RETURN(result); @@ -1154,5 +1188,3 @@ bool make_global_read_lock_block_commit(THD *thd) thd->exit_cond(old_message); // this unlocks LOCK_global_read_lock DBUG_RETURN(error); } - - diff --git a/sql/log.cc b/sql/log.cc index 85e8c4dae2f..ff14b986aa4 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -24,6 +24,7 @@ #include "mysql_priv.h" #include "sql_repl.h" +#include "rpl_filter.h" #include <my_dir.h> #include <stdarg.h> @@ -33,7 +34,17 @@ #include "message.h" #endif -MYSQL_LOG mysql_log, mysql_slow_log, mysql_bin_log; +/* max size of the log message */ +#define MAX_LOG_BUFFER_SIZE 1024 +#define MAX_USER_HOST_SIZE 512 +#define MAX_TIME_SIZE 32 + +/* we need this for log files intialization */ +extern char *opt_logname, *opt_slow_logname; + +LOGGER logger; + +MYSQL_LOG mysql_bin_log; ulong sync_binlog_counter= 0; static bool test_if_number(const char *str, @@ -46,11 +57,25 @@ static int binlog_commit(THD *thd, bool all); static int binlog_rollback(THD *thd, bool all); static int binlog_prepare(THD *thd, bool all); +/* + This is a POD. Please keep it that way! + + Don't add constructors, destructors, or virtual functions. +*/ +struct binlog_trx_data { + bool empty() const { + return pending == NULL && my_b_tell(&trans_log) == 0; + } + IO_CACHE trans_log; // The transaction cache + Rows_log_event *pending; // The pending binrows event +}; + handlerton binlog_hton = { + MYSQL_HANDLERTON_INTERFACE_VERSION, "binlog", SHOW_OPTION_YES, "This is a meta storage engine to represent the binlog in a transaction", - DB_TYPE_UNKNOWN, /* IGNORE for now */ + DB_TYPE_BINLOG, /* IGNORE for now */ binlog_init, 0, sizeof(my_off_t), /* savepoint size = binlog offset */ @@ -67,9 +92,925 @@ handlerton binlog_hton = { NULL, /* create_cursor_read_view */ NULL, /* set_cursor_read_view */ NULL, /* close_cursor_read_view */ - HTON_HIDDEN + NULL, /* Create a new handler */ + NULL, /* Drop a database */ + NULL, /* Panic call */ + NULL, /* Start Consistent Snapshot */ + NULL, /* Flush logs */ + NULL, /* Show status */ + NULL, /* Partition flags */ + NULL, /* Alter table flags */ + NULL, /* Alter Tablespace */ + NULL, /* Fill FILES table */ + HTON_NOT_USER_SELECTABLE | HTON_HIDDEN, + NULL, /* binlog_func */ + NULL /* binlog_log_query */ }; + + +/* + Open log table of a given type (general or slow log) + + SYNOPSIS + open_log_table() + + log_type type of the log table to open: QUERY_LOG_GENERAL + or QUERY_LOG_SLOW + + DESCRIPTION + + The function opens a log table and marks it as such. Log tables are open + during the whole time, while server is running. Except for the moments + when they have to be reopened: during FLUSH LOGS and TRUNCATE. This + function is invoked directly only once during startup. All subsequent + calls happen through reopen_log_table(), which performs additional check. + + RETURN + FALSE - OK + TRUE - error occured +*/ + +bool Log_to_csv_event_handler::open_log_table(uint log_type) +{ + THD *log_thd, *curr= current_thd; + TABLE_LIST *table; + bool error= FALSE; + DBUG_ENTER("open_log_table"); + + switch (log_type) { + case QUERY_LOG_GENERAL: + log_thd= general_log_thd; + table= &general_log; + /* clean up table before reuse/initial usage */ + bzero((char*) table, sizeof(TABLE_LIST)); + table->alias= table->table_name= (char*) "general_log"; + table->table_name_length= 11; + break; + case QUERY_LOG_SLOW: + log_thd= slow_log_thd; + table= &slow_log; + bzero((char*) table, sizeof(TABLE_LIST)); + table->alias= table->table_name= (char*) "slow_log"; + table->table_name_length= 8; + break; + default: + DBUG_ASSERT(0); + } + + /* + This way we check that appropriate log thd was created ok during + initialization. We cannot check "is_log_tables_initialized" var, as + the very initialization is not finished until this function is + completed in the very first time. + */ + if (!log_thd) + { + DBUG_PRINT("error",("Cannot initialize log tables")); + DBUG_RETURN(TRUE); + } + + /* + Set THD's thread_stack. This is needed to perform stack overrun + check, which is done by some routines (e.g. open_table()). + In the case we are called by thread, which already has this parameter + set, we use this value. Otherwise we do a wild guess. This won't help + to correctly track the stack overrun in these exceptional cases (which + could probably happen only during startup and shutdown) but at least + lets us to pass asserts. + The problem stems from the fact that logger THDs are not real threads. + */ + if (curr) + log_thd->thread_stack= curr->thread_stack; + else + log_thd->thread_stack= (char*) &log_thd; + + log_thd->store_globals(); + + table->lock_type= TL_WRITE_CONCURRENT_INSERT; + table->db= log_thd->db; + table->db_length= log_thd->db_length; + + if (simple_open_n_lock_tables(log_thd, table) || + table->table->file->extra(HA_EXTRA_MARK_AS_LOG_TABLE) || + table->table->file->ha_rnd_init(0)) + error= TRUE; + else + table->table->locked_by_logger= TRUE; + + /* restore thread settings */ + if (curr) + curr->store_globals(); + else + { + my_pthread_setspecific_ptr(THR_THD, 0); + my_pthread_setspecific_ptr(THR_MALLOC, 0); + } + + DBUG_RETURN(error); +} + + +Log_to_csv_event_handler::Log_to_csv_event_handler() +{ + /* init artificial THD's */ + general_log_thd= new THD; + /* logger thread always works with mysql database */ + general_log_thd->db= my_strdup("mysql", MYF(0)); + general_log_thd->db_length= 5; + + slow_log_thd= new THD; + /* logger thread always works with mysql database */ + slow_log_thd->db= my_strdup("mysql", MYF(0));; + slow_log_thd->db_length= 5; +} + + +Log_to_csv_event_handler::~Log_to_csv_event_handler() +{ + /* now cleanup the tables */ + if (general_log_thd) + { + delete general_log_thd; + general_log_thd= NULL; + } + + if (slow_log_thd) + { + delete slow_log_thd; + slow_log_thd= NULL; + } +} + + +/* + Reopen log table of a given type + + SYNOPSIS + reopen_log_table() + + log_type type of the log table to open: QUERY_LOG_GENERAL + or QUERY_LOG_SLOW + + DESCRIPTION + + The function is a wrapper around open_log_table(). It is used during + FLUSH LOGS and TRUNCATE of the log tables (i.e. when we need to close + and reopen them). The difference is in the check of the + logger.is_log_tables_initialized var, which can't be done in + open_log_table(), as it makes no sense during startup. + + NOTE: this code assumes that we have logger mutex locked + + RETURN + FALSE - ok + TRUE - open_log_table() returned an error +*/ + +bool Log_to_csv_event_handler::reopen_log_table(uint log_type) +{ + /* don't open the log table, if it wasn't enabled during startup */ + if (!logger.is_log_tables_initialized) + return FALSE; + return open_log_table(log_type); +} + +void Log_to_csv_event_handler::cleanup() +{ + close_log_table(QUERY_LOG_GENERAL, FALSE); + close_log_table(QUERY_LOG_SLOW, FALSE); + logger.is_log_tables_initialized= FALSE; +} + +/* log event handlers */ + +/* + Log command to the general log table + + SYNOPSIS + log_general_to_csv() + + event_time command start timestamp + user_host the pointer to the string with user@host info + user_host_len length of the user_host string. this is computed once + and passed to all general log event handlers + thread_id Id of the thread, issued a query + command_type the type of the command being logged + command_type_len the length of the string above + sql_text the very text of the query being executed + sql_text_len the length of sql_text string + + DESCRIPTION + + Log given command to the general log table + + RETURN + FALSE - OK + TRUE - error occured +*/ + +bool Log_to_csv_event_handler:: + log_general(time_t event_time, const char *user_host, + uint user_host_len, int thread_id, + const char *command_type, uint command_type_len, + const char *sql_text, uint sql_text_len, + CHARSET_INFO *client_cs) +{ + TABLE *table= general_log.table; + + /* below should never happen */ + if (unlikely(!logger.is_log_tables_initialized)) + return FALSE; + + /* log table entries are not replicated at the moment */ + tmp_disable_binlog(current_thd); + + general_log_thd->start_time= event_time; + /* set default value (which is CURRENT_TIMESTAMP) */ + table->field[0]->set_null(); + + table->field[1]->store(user_host, user_host_len, client_cs); + table->field[2]->store((longlong) thread_id); + table->field[3]->store((longlong) server_id); + table->field[4]->store(command_type, command_type_len, client_cs); + table->field[5]->store(sql_text, sql_text_len, client_cs); + table->file->ha_write_row(table->record[0]); + + reenable_binlog(current_thd); + + return FALSE; +} + + +/* + Log a query to the slow log table + + SYNOPSIS + log_slow_to_csv() + thd THD of the query + current_time current timestamp + query_start_arg command start timestamp + user_host the pointer to the string with user@host info + user_host_len length of the user_host string. this is computed once + and passed to all general log event handlers + query_time Amount of time the query took to execute (in seconds) + lock_time Amount of time the query was locked (in seconds) + is_command The flag, which determines, whether the sql_text is a + query or an administrator command (these are treated + differently by the old logging routines) + sql_text the very text of the query or administrator command + processed + sql_text_len the length of sql_text string + + DESCRIPTION + + Log a query to the slow log table + + RETURN + FALSE - OK + TRUE - error occured +*/ + +bool Log_to_csv_event_handler:: + log_slow(THD *thd, time_t current_time, time_t query_start_arg, + const char *user_host, uint user_host_len, + longlong query_time, longlong lock_time, bool is_command, + const char *sql_text, uint sql_text_len) +{ + /* table variables */ + TABLE *table= slow_log.table; + CHARSET_INFO *client_cs= thd->variables.character_set_client; + + DBUG_ENTER("log_slow_to_csv"); + + /* below should never happen */ + if (unlikely(!logger.is_log_tables_initialized)) + return FALSE; + + /* log table entries are not replicated at the moment */ + tmp_disable_binlog(current_thd); + + /* + Set start time for CURRENT_TIMESTAMP to the start of the query. + This will be default value for the field + */ + slow_log_thd->start_time= query_start_arg; + + /* set default value (which is CURRENT_TIMESTAMP) */ + table->field[0]->set_null(); + + /* store the value */ + table->field[1]->store(user_host, user_host_len, client_cs); + + if (query_start_arg) + { + /* fill in query_time field */ + table->field[2]->store(query_time); + /* lock_time */ + table->field[3]->store(lock_time); + /* rows_sent */ + table->field[4]->store((longlong) thd->sent_row_count); + /* rows_examined */ + table->field[5]->store((longlong) thd->examined_row_count); + } + else + { + table->field[2]->set_null(); + table->field[3]->set_null(); + table->field[4]->set_null(); + table->field[5]->set_null(); + } + + if (thd->db) + /* fill database field */ + table->field[6]->store(thd->db, thd->db_length, client_cs); + else + table->field[6]->set_null(); + + if (thd->last_insert_id_used) + table->field[7]->store((longlong) thd->current_insert_id); + else + table->field[7]->set_null(); + + /* set value if we do an insert on autoincrement column */ + if (thd->insert_id_used) + table->field[8]->store((longlong) thd->last_insert_id); + else + table->field[8]->set_null(); + + table->field[9]->store((longlong) server_id); + + /* sql_text */ + table->field[10]->store(sql_text,sql_text_len, client_cs); + + /* write the row */ + table->file->ha_write_row(table->record[0]); + + reenable_binlog(current_thd); + + DBUG_RETURN(0); +} + +bool Log_to_csv_event_handler:: + log_error(enum loglevel level, const char *format, va_list args) +{ + /* No log table is implemented */ + DBUG_ASSERT(0); + return FALSE; +} + +bool Log_to_file_event_handler:: + log_error(enum loglevel level, const char *format, + va_list args) +{ + return vprint_msg_to_log(level, format, args); +} + +void Log_to_file_event_handler::init_pthread_objects() +{ + mysql_log.init_pthread_objects(); + mysql_slow_log.init_pthread_objects(); +} + + +/* Wrapper around MYSQL_LOG::write() for slow log */ + +bool Log_to_file_event_handler:: + log_slow(THD *thd, time_t current_time, time_t query_start_arg, + const char *user_host, uint user_host_len, + longlong query_time, longlong lock_time, bool is_command, + const char *sql_text, uint sql_text_len) +{ + return mysql_slow_log.write(thd, current_time, query_start_arg, + user_host, user_host_len, + query_time, lock_time, is_command, + sql_text, sql_text_len); +} + + +/* + Wrapper around MYSQL_LOG::write() for general log. We need it since we + want all log event handlers to have the same signature. +*/ + +bool Log_to_file_event_handler:: + log_general(time_t event_time, const char *user_host, + uint user_host_len, int thread_id, + const char *command_type, uint command_type_len, + const char *sql_text, uint sql_text_len, + CHARSET_INFO *client_cs) +{ + return mysql_log.write(event_time, user_host, user_host_len, + thread_id, command_type, command_type_len, + sql_text, sql_text_len); +} + + +bool Log_to_file_event_handler::init() +{ + if (!is_initialized) + { + if (opt_slow_log) + mysql_slow_log.open_slow_log(opt_slow_logname); + + if (opt_log) + mysql_log.open_query_log(opt_logname); + + is_initialized= TRUE; + } + + return FALSE; +} + + +void Log_to_file_event_handler::cleanup() +{ + mysql_log.cleanup(); + mysql_slow_log.cleanup(); +} + +void Log_to_file_event_handler::flush() +{ + /* reopen log files */ + mysql_log.new_file(1); + mysql_slow_log.new_file(1); +} + +/* + Log error with all enabled log event handlers + + SYNOPSIS + error_log_print() + + level The level of the error significance: NOTE, + WARNING or ERROR. + format format string for the error message + args list of arguments for the format string + + RETURN + FALSE - OK + TRUE - error occured +*/ + +bool LOGGER::error_log_print(enum loglevel level, const char *format, + va_list args) +{ + bool error= FALSE; + Log_event_handler **current_handler= error_log_handler_list; + + /* currently we don't need locking here as there is no error_log table */ + while (*current_handler) + error= (*current_handler++)->log_error(level, format, args) || error; + + return error; +} + + +void LOGGER::cleanup_base() +{ + DBUG_ASSERT(inited == 1); + (void) pthread_mutex_destroy(&LOCK_logger); + if (table_log_handler) + { + table_log_handler->cleanup(); + delete table_log_handler; + } + if (file_log_handler) + file_log_handler->cleanup(); +} + + +void LOGGER::cleanup_end() +{ + DBUG_ASSERT(inited == 1); + if (file_log_handler) + delete file_log_handler; +} + + +void LOGGER::close_log_table(uint log_type, bool lock_in_use) +{ + table_log_handler->close_log_table(log_type, lock_in_use); +} + + +/* + Perform basic log initialization: create file-based log handler and + init error log. +*/ +void LOGGER::init_base() +{ + DBUG_ASSERT(inited == 0); + inited= 1; + + /* + Here we create file log handler. We don't do it for the table log handler + here as it cannot be created so early. The reason is THD initialization, + which depends on the system variables (parsed later). + */ + if (!file_log_handler) + file_log_handler= new Log_to_file_event_handler; + + /* by default we use traditional error log */ + init_error_log(LOG_FILE); + + file_log_handler->init_pthread_objects(); + (void) pthread_mutex_init(&LOCK_logger, MY_MUTEX_INIT_SLOW); +} + + +void LOGGER::init_log_tables() +{ + if (!table_log_handler) + table_log_handler= new Log_to_csv_event_handler; + + if (!is_log_tables_initialized && + !table_log_handler->init() && !file_log_handler->init()) + is_log_tables_initialized= TRUE; +} + + +bool LOGGER::reopen_log_table(uint log_type) +{ + return table_log_handler->reopen_log_table(log_type); +} + + +bool LOGGER::flush_logs(THD *thd) +{ + TABLE_LIST close_slow_log, close_general_log; + + /* reopen log tables */ + bzero((char*) &close_slow_log, sizeof(TABLE_LIST)); + close_slow_log.alias= close_slow_log.table_name=(char*) "slow_log"; + close_slow_log.table_name_length= 8; + close_slow_log.db= (char*) "mysql"; + close_slow_log.db_length= 5; + + bzero((char*) &close_general_log, sizeof(TABLE_LIST)); + close_general_log.alias= close_general_log.table_name=(char*) "general_log"; + close_general_log.table_name_length= 11; + close_general_log.db= (char*) "mysql"; + close_general_log.db_length= 5; + + /* reopen log files */ + file_log_handler->flush(); + + /* + this will lock and wait for all but the logger thread to release the + tables. Then we could reopen log tables. Then release the name locks. + */ + lock_and_wait_for_table_name(thd, &close_slow_log); + lock_and_wait_for_table_name(thd, &close_general_log); + + /* deny others from logging to general and slow log, while reopening tables */ + logger.lock(); + + table_log_handler->flush(thd, &close_slow_log, &close_general_log); + + /* end of log tables flush */ + logger.unlock(); + return FALSE; +} + + +/* + Log slow query with all enabled log event handlers + + SYNOPSIS + slow_log_print() + + thd THD of the query being logged + query The query being logged + query_length The length of the query string + query_start_arg Query start timestamp + + RETURN + FALSE - OK + TRUE - error occured +*/ + +bool LOGGER::slow_log_print(THD *thd, const char *query, uint query_length, + time_t query_start_arg) +{ + bool error= FALSE; + Log_event_handler **current_handler= slow_log_handler_list; + bool is_command= FALSE; + + char message_buff[MAX_LOG_BUFFER_SIZE]; + char user_host_buff[MAX_USER_HOST_SIZE]; + + my_time_t current_time; + Security_context *sctx= thd->security_ctx; + uint message_buff_len= 0, user_host_len= 0; + longlong query_time= 0, lock_time= 0; + longlong last_insert_id= 0, insert_id= 0; + + /* + Print the message to the buffer if we have slow log enabled + */ + + if (*slow_log_handler_list) + { + current_time= time(NULL); + + if (!(thd->options & OPTION_UPDATE_LOG)) + return 0; + + lock(); + + /* fill in user_host value: the format is "%s[%s] @ %s [%s]" */ + user_host_len= strxnmov(user_host_buff, MAX_USER_HOST_SIZE, + sctx->priv_user ? sctx->priv_user : "", "[", + sctx->user ? sctx->user : "", "] @ ", + sctx->host ? sctx->host : "", " [", + sctx->ip ? sctx->ip : "", "]", NullS) - + user_host_buff; + + if (query_start_arg) + { + query_time= (longlong) (current_time - query_start_arg); + lock_time= (longlong) (thd->time_after_lock - query_start_arg); + } + + if (thd->last_insert_id_used) + last_insert_id= (longlong) thd->current_insert_id; + + /* set value if we do an insert on autoincrement column */ + if (thd->insert_id_used) + insert_id= (longlong) thd->last_insert_id; + + if (!query) + { + is_command= TRUE; + query= command_name[thd->command].str; + query_length= command_name[thd->command].length; + } + + while (*current_handler) + error= (*current_handler++)->log_slow(thd, current_time, query_start_arg, + user_host_buff, user_host_len, + query_time, lock_time, is_command, + query, query_length) || error; + + unlock(); + } + return error; +} + +bool LOGGER::general_log_print(THD *thd, enum enum_server_command command, + const char *format, va_list args) +{ + bool error= FALSE; + Log_event_handler **current_handler= general_log_handler_list; + + /* + Print the message to the buffer if we have at least one log event handler + enabled and want to log this king of commands + */ + if (*general_log_handler_list && (what_to_log & (1L << (uint) command))) + { + char message_buff[MAX_LOG_BUFFER_SIZE]; + char user_host_buff[MAX_USER_HOST_SIZE]; + Security_context *sctx= thd->security_ctx; + ulong id; + uint message_buff_len= 0, user_host_len= 0; + + if (thd) + { /* Normal thread */ + if ((thd->options & OPTION_LOG_OFF) +#ifndef NO_EMBEDDED_ACCESS_CHECKS + && (sctx->master_access & SUPER_ACL) +#endif + ) + { + return 0; /* No logging */ + } + id= thd->thread_id; + } + else + id=0; /* Log from connect handler */ + + lock(); + time_t current_time= time(NULL); + + user_host_len= strxnmov(user_host_buff, MAX_USER_HOST_SIZE, + sctx->priv_user ? sctx->priv_user : "", "[", + sctx->user ? sctx->user : "", "] @ ", + sctx->host ? sctx->host : "", " [", + sctx->ip ? sctx->ip : "", "]", NullS) - + user_host_buff; + + /* prepare message */ + if (format) + message_buff_len= my_vsnprintf(message_buff, + sizeof(message_buff), format, args); + else + message_buff[0]= '\0'; + + while (*current_handler) + error+= (*current_handler++)-> + log_general(current_time, user_host_buff, + user_host_len, id, + command_name[(uint) command].str, + command_name[(uint) command].length, + message_buff, message_buff_len, + thd->variables.character_set_client) || error; + unlock(); + } + return error; +} + +void LOGGER::init_error_log(uint error_log_printer) +{ + if (error_log_printer & LOG_NONE) + { + error_log_handler_list[0]= 0; + return; + } + + switch (error_log_printer) { + case LOG_FILE: + error_log_handler_list[0]= file_log_handler; + error_log_handler_list[1]= 0; + break; + /* these two are disabled for now */ + case LOG_TABLE: + DBUG_ASSERT(0); + break; + case LOG_TABLE|LOG_FILE: + DBUG_ASSERT(0); + break; + } +} + +void LOGGER::init_slow_log(uint slow_log_printer) +{ + if (slow_log_printer & LOG_NONE) + { + slow_log_handler_list[0]= 0; + return; + } + + switch (slow_log_printer) { + case LOG_FILE: + slow_log_handler_list[0]= file_log_handler; + slow_log_handler_list[1]= 0; + break; + case LOG_TABLE: + slow_log_handler_list[0]= table_log_handler; + slow_log_handler_list[1]= 0; + break; + case LOG_TABLE|LOG_FILE: + slow_log_handler_list[0]= file_log_handler; + slow_log_handler_list[1]= table_log_handler; + slow_log_handler_list[2]= 0; + break; + } +} + +void LOGGER::init_general_log(uint general_log_printer) +{ + if (general_log_printer & LOG_NONE) + { + general_log_handler_list[0]= 0; + return; + } + + switch (general_log_printer) { + case LOG_FILE: + general_log_handler_list[0]= file_log_handler; + general_log_handler_list[1]= 0; + break; + case LOG_TABLE: + general_log_handler_list[0]= table_log_handler; + general_log_handler_list[1]= 0; + break; + case LOG_TABLE|LOG_FILE: + general_log_handler_list[0]= file_log_handler; + general_log_handler_list[1]= table_log_handler; + general_log_handler_list[2]= 0; + break; + } +} + + +bool Log_to_csv_event_handler::flush(THD *thd, TABLE_LIST *close_slow_log, + TABLE_LIST *close_general_log) +{ + VOID(pthread_mutex_lock(&LOCK_open)); + close_log_table(QUERY_LOG_GENERAL, TRUE); + close_log_table(QUERY_LOG_SLOW, TRUE); + close_general_log->next_local= close_slow_log; + query_cache_invalidate3(thd, close_general_log, 0); + unlock_table_name(thd, close_slow_log); + unlock_table_name(thd, close_general_log); + VOID(pthread_mutex_unlock(&LOCK_open)); + return reopen_log_table(QUERY_LOG_SLOW) || + reopen_log_table(QUERY_LOG_GENERAL); +} + +/* the parameters are unused for the log tables */ +bool Log_to_csv_event_handler::init() +{ + /* we always open log tables. even if the logging is disabled */ + return (open_log_table(QUERY_LOG_GENERAL) || open_log_table(QUERY_LOG_SLOW)); +} + +int LOGGER::set_handlers(uint error_log_printer, + uint slow_log_printer, + uint general_log_printer) +{ + /* error log table is not supported yet */ + DBUG_ASSERT(error_log_printer < LOG_TABLE); + + lock(); + + if ((slow_log_printer & LOG_TABLE || general_log_printer & LOG_TABLE) && + !is_log_tables_initialized) + { + slow_log_printer= (slow_log_printer & ~LOG_TABLE) | LOG_FILE; + general_log_printer= (general_log_printer & ~LOG_TABLE) | LOG_FILE; + + sql_print_error("Failed to initialize log tables. " + "Falling back to the old-fashioned logs"); + } + + init_error_log(error_log_printer); + init_slow_log(slow_log_printer); + init_general_log(general_log_printer); + + unlock(); + + return 0; +} + + +/* + Close log table of a given type (general or slow log) + + SYNOPSIS + close_log_table() + + log_type type of the log table to close: QUERY_LOG_GENERAL + or QUERY_LOG_SLOW + lock_in_use Set to TRUE if the caller owns LOCK_open. FALSE otherwise. + + DESCRIPTION + + The function closes a log table. It is invoked (1) when we need to reopen + log tables (e.g. FLUSH LOGS or TRUNCATE on the log table is being + executed) or (2) during shutdown. +*/ + +void Log_to_csv_event_handler:: + close_log_table(uint log_type, bool lock_in_use) +{ + THD *log_thd, *curr= current_thd; + TABLE_LIST *table; + + if (!logger.is_log_tables_initialized) + return; /* do nothing */ + + switch (log_type) { + case QUERY_LOG_GENERAL: + log_thd= general_log_thd; + table= &general_log; + break; + case QUERY_LOG_SLOW: + log_thd= slow_log_thd; + table= &slow_log; + break; + default: + DBUG_ASSERT(0); + } + + /* + Set thread stack start for the logger thread. See comment in + open_log_table() for details. + */ + if (curr) + log_thd->thread_stack= curr->thread_stack; + else + log_thd->thread_stack= (char*) &log_thd; + + /* close the table */ + log_thd->store_globals(); + table->table->file->ha_rnd_end(); + /* discard logger mark before unlock*/ + table->table->locked_by_logger= FALSE; + close_thread_tables(log_thd, lock_in_use); + + if (curr) + curr->store_globals(); + else + { + my_pthread_setspecific_ptr(THR_THD, 0); + my_pthread_setspecific_ptr(THR_MALLOC, 0); + } +} + + /* this function is mostly a placeholder. conceptually, binlog initialization (now mostly done in MYSQL_LOG::open) @@ -83,19 +1024,45 @@ bool binlog_init() static int binlog_close_connection(THD *thd) { - IO_CACHE *trans_log= (IO_CACHE*)thd->ha_data[binlog_hton.slot]; - DBUG_ASSERT(mysql_bin_log.is_open() && !my_b_tell(trans_log)); + binlog_trx_data *const trx_data= + (binlog_trx_data*) thd->ha_data[binlog_hton.slot]; + IO_CACHE *trans_log= &trx_data->trans_log; + DBUG_ASSERT(mysql_bin_log.is_open() && trx_data->empty()); close_cached_file(trans_log); - my_free((gptr)trans_log, MYF(0)); + thd->ha_data[binlog_hton.slot]= 0; + my_free((gptr)trx_data, MYF(0)); return 0; } -static int binlog_end_trans(THD *thd, IO_CACHE *trans_log, Log_event *end_ev) +static int +binlog_end_trans(THD *thd, binlog_trx_data *trx_data, Log_event *end_ev) { - int error=0; DBUG_ENTER("binlog_end_trans"); + int error=0; + IO_CACHE *trans_log= &trx_data->trans_log; + if (end_ev) + { + thd->binlog_flush_pending_rows_event(true); error= mysql_bin_log.write(thd, trans_log, end_ev); + } + else + { + thd->binlog_delete_pending_rows_event(); + } + + /* + We need to step the table map version both after writing the + entire transaction to the log file and after rolling back the + transaction. + + We need to step the table map version after writing the + transaction cache to disk. In addition, we need to step the table + map version on a rollback to ensure that a new table map event is + generated instead of the one that was written to the thrown-away + transaction cache. + */ + ++mysql_bin_log.m_table_map_version; statistic_increment(binlog_cache_use, &LOCK_status); if (trans_log->disk_writes != 0) @@ -121,33 +1088,37 @@ static int binlog_prepare(THD *thd, bool all) static int binlog_commit(THD *thd, bool all) { - IO_CACHE *trans_log= (IO_CACHE*)thd->ha_data[binlog_hton.slot]; DBUG_ENTER("binlog_commit"); + binlog_trx_data *const trx_data= + (binlog_trx_data*) thd->ha_data[binlog_hton.slot]; + IO_CACHE *trans_log= &trx_data->trans_log; DBUG_ASSERT(mysql_bin_log.is_open() && (all || !(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))); - if (!my_b_tell(trans_log)) + if (trx_data->empty()) { // we're here because trans_log was flushed in MYSQL_LOG::log() DBUG_RETURN(0); } Query_log_event qev(thd, STRING_WITH_LEN("COMMIT"), TRUE, FALSE); qev.error_code= 0; // see comment in MYSQL_LOG::write(THD, IO_CACHE) - DBUG_RETURN(binlog_end_trans(thd, trans_log, &qev)); + DBUG_RETURN(binlog_end_trans(thd, trx_data, &qev)); } static int binlog_rollback(THD *thd, bool all) { - int error=0; - IO_CACHE *trans_log= (IO_CACHE*)thd->ha_data[binlog_hton.slot]; DBUG_ENTER("binlog_rollback"); + int error=0; + binlog_trx_data *const trx_data= + (binlog_trx_data*) thd->ha_data[binlog_hton.slot]; + IO_CACHE *trans_log= &trx_data->trans_log; /* First assert is guaranteed - see trans_register_ha() call below. The second must be true. If it is not, we're registering unnecessary, doing extra work. The cause should be found and eliminated */ DBUG_ASSERT(all || !(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))); - DBUG_ASSERT(mysql_bin_log.is_open() && my_b_tell(trans_log)); + DBUG_ASSERT(mysql_bin_log.is_open() && !trx_data->empty()); /* Update the binary log with a BEGIN/ROLLBACK block if we have cached some queries and we updated some non-transactional @@ -158,10 +1129,10 @@ static int binlog_rollback(THD *thd, bool all) { Query_log_event qev(thd, STRING_WITH_LEN("ROLLBACK"), TRUE, FALSE); qev.error_code= 0; // see comment in MYSQL_LOG::write(THD, IO_CACHE) - error= binlog_end_trans(thd, trans_log, &qev); + error= binlog_end_trans(thd, trx_data, &qev); } else - error= binlog_end_trans(thd, trans_log, 0); + error= binlog_end_trans(thd, trx_data, 0); DBUG_RETURN(error); } @@ -188,20 +1159,26 @@ static int binlog_rollback(THD *thd, bool all) static int binlog_savepoint_set(THD *thd, void *sv) { - IO_CACHE *trans_log= (IO_CACHE*)thd->ha_data[binlog_hton.slot]; DBUG_ENTER("binlog_savepoint_set"); - DBUG_ASSERT(mysql_bin_log.is_open() && my_b_tell(trans_log)); + binlog_trx_data *const trx_data= + (binlog_trx_data*) thd->ha_data[binlog_hton.slot]; + DBUG_ASSERT(mysql_bin_log.is_open() && my_b_tell(&trx_data->trans_log)); - *(my_off_t *)sv= my_b_tell(trans_log); + *(my_off_t *)sv= my_b_tell(&trx_data->trans_log); /* Write it to the binary log */ - Query_log_event qinfo(thd, thd->query, thd->query_length, TRUE, FALSE); - DBUG_RETURN(mysql_bin_log.write(&qinfo)); + + int const error= + thd->binlog_query(THD::STMT_QUERY_TYPE, + thd->query, thd->query_length, TRUE, FALSE); + DBUG_RETURN(error); } static int binlog_savepoint_rollback(THD *thd, void *sv) { - IO_CACHE *trans_log= (IO_CACHE*)thd->ha_data[binlog_hton.slot]; DBUG_ENTER("binlog_savepoint_rollback"); + binlog_trx_data *const trx_data= + (binlog_trx_data*) thd->ha_data[binlog_hton.slot]; + IO_CACHE *trans_log= &trx_data->trans_log; DBUG_ASSERT(mysql_bin_log.is_open() && my_b_tell(trans_log)); /* @@ -211,8 +1188,10 @@ static int binlog_savepoint_rollback(THD *thd, void *sv) */ if (unlikely(thd->options & OPTION_STATUS_NO_TRANS_UPDATE)) { - Query_log_event qinfo(thd, thd->query, thd->query_length, TRUE, FALSE); - DBUG_RETURN(mysql_bin_log.write(&qinfo)); + int const error= + thd->binlog_query(THD::STMT_QUERY_TYPE, + thd->query, thd->query_length, TRUE, FALSE); + DBUG_RETURN(error); } reinit_io_cache(trans_log, WRITE_CACHE, *(my_off_t *)sv, 0, 0); DBUG_RETURN(0); @@ -360,6 +1339,7 @@ MYSQL_LOG::MYSQL_LOG() :bytes_written(0), last_time(0), query_start(0), name(0), prepared_xids(0), log_type(LOG_CLOSED), file_id(1), open_count(1), write_error(FALSE), inited(FALSE), need_start_event(TRUE), + m_table_map_version(0), description_event_for_exec(0), description_event_for_queue(0) { /* @@ -928,6 +1908,7 @@ bool MYSQL_LOG::reset_logs(THD* thd) enum_log_type save_log_type; DBUG_ENTER("reset_logs"); + ha_reset_logs(thd); /* We need to get both locks to be sure that no one is trying to write to the index log file. @@ -1177,6 +2158,9 @@ int MYSQL_LOG::purge_logs(const char *to_log, DBUG_PRINT("info",("purging %s",log_info.log_file_name)); if (!my_delete(log_info.log_file_name, MYF(0)) && decrease_log_space) *decrease_log_space-= file_size; + + ha_binlog_index_purge_file(current_thd, log_info.log_file_name); + if (find_next_log(&log_info, 0) || exit_loop) break; } @@ -1237,6 +2221,9 @@ int MYSQL_LOG::purge_logs_before_date(time_t purge_time) stat_area.st_mtime >= purge_time) break; my_delete(log_info.log_file_name, MYF(0)); + + ha_binlog_index_purge_file(current_thd, log_info.log_file_name); + if (find_next_log(&log_info, 0)) break; } @@ -1356,7 +2343,7 @@ void MYSQL_LOG::new_file(bool need_lock) to change base names at some point. */ THD *thd = current_thd; /* may be 0 if we are reacting to SIGHUP */ - Rotate_log_event r(thd,new_name+dirname_length(new_name), + Rotate_log_event r(new_name+dirname_length(new_name), 0, LOG_EVENT_OFFSET, 0); r.write(&log_file); bytes_written += r.data_written; @@ -1458,95 +2445,97 @@ err: /* - Write to normal (not rotable) log - This is the format for the 'normal' log. + Write a command to traditional general log file + + SYNOPSIS + write() + + event_time command start timestamp + user_host the pointer to the string with user@host info + user_host_len length of the user_host string. this is computed once + and passed to all general log event handlers + thread_id Id of the thread, issued a query + command_type the type of the command being logged + command_type_len the length of the string above + sql_text the very text of the query being executed + sql_text_len the length of sql_text string + + DESCRIPTION + + Log given command to to normal (not rotable) log file + + RETURN + FASE - OK + TRUE - error occured */ -bool MYSQL_LOG::write(THD *thd,enum enum_server_command command, - const char *format,...) +bool MYSQL_LOG::write(time_t event_time, const char *user_host, + uint user_host_len, int thread_id, + const char *command_type, uint command_type_len, + const char *sql_text, uint sql_text_len) { - if (is_open() && (what_to_log & (1L << (uint) command))) + char buff[32]; + uint length= 0; + char time_buff[MAX_TIME_SIZE]; + struct tm start; + uint time_buff_len= 0; + + /* Test if someone closed between the is_open test and lock */ + if (is_open()) { - uint length; - int error= 0; - VOID(pthread_mutex_lock(&LOCK_log)); + /* Note that my_b_write() assumes it knows the length for this */ + if (event_time != last_time) + { + last_time= event_time; - /* Test if someone closed between the is_open test and lock */ - if (is_open()) - { - time_t skr; - ulong id; - va_list args; - va_start(args,format); - char buff[32]; - - if (thd) - { // Normal thread - if ((thd->options & OPTION_LOG_OFF) -#ifndef NO_EMBEDDED_ACCESS_CHECKS - && (thd->security_ctx->master_access & SUPER_ACL) -#endif -) - { - VOID(pthread_mutex_unlock(&LOCK_log)); - return 0; // No logging - } - id=thd->thread_id; - if (thd->user_time || !(skr=thd->query_start())) - skr=time(NULL); // Connected + localtime_r(&event_time, &start); + + time_buff_len= my_snprintf(time_buff, MAX_TIME_SIZE, + "%02d%02d%02d %2d:%02d:%02d", + start.tm_year % 100, start.tm_mon + 1, + start.tm_mday, start.tm_hour, + start.tm_min, start.tm_sec); + + if (my_b_write(&log_file, (byte*) &time_buff, time_buff_len)) + goto err; } else - { // Log from connect handler - skr=time(NULL); - id=0; - } - if (skr != last_time) - { - last_time=skr; - struct tm tm_tmp; - struct tm *start; - localtime_r(&skr,&tm_tmp); - start=&tm_tmp; - /* Note that my_b_write() assumes it knows the length for this */ - sprintf(buff,"%02d%02d%02d %2d:%02d:%02d\t", - start->tm_year % 100, - start->tm_mon+1, - start->tm_mday, - start->tm_hour, - start->tm_min, - start->tm_sec); - if (my_b_write(&log_file, (byte*) buff,16)) - error=errno; - } - else if (my_b_write(&log_file, (byte*) "\t\t",2) < 0) - error=errno; - length=my_sprintf(buff, - (buff, "%7ld %-11.11s", id, - command_name[(uint) command])); - if (my_b_write(&log_file, (byte*) buff,length)) - error=errno; - if (format) - { - if (my_b_write(&log_file, (byte*) " ",1) || - my_b_vprintf(&log_file,format,args) == (uint) -1) - error=errno; - } - if (my_b_write(&log_file, (byte*) "\n",1) || - flush_io_cache(&log_file)) - error=errno; - if (error && ! write_error) - { - write_error=1; - sql_print_error(ER(ER_ERROR_ON_WRITE),name,error); - } - va_end(args); - } - VOID(pthread_mutex_unlock(&LOCK_log)); - return error != 0; + if (my_b_write(&log_file, (byte*) "\t\t" ,2) < 0) + goto err; + + /* command_type, thread_id */ + length= my_snprintf(buff, 32, "%5ld ", thread_id); + + if (my_b_write(&log_file, (byte*) buff, length)) + goto err; + + if (my_b_write(&log_file, (byte*) command_type, command_type_len)) + goto err; + + if (my_b_write(&log_file, (byte*) "\t", 1)) + goto err; + + /* sql_text */ + if (my_b_write(&log_file, (byte*) sql_text, sql_text_len)) + goto err; + + if (my_b_write(&log_file, (byte*) "\n", 1) || + flush_io_cache(&log_file)) + goto err; } - return 0; + + return FALSE; +err: + + if (!write_error) + { + write_error= 1; + sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno); + } + return TRUE; } + bool MYSQL_LOG::flush_and_sync() { int err=0, fd=log_file.file; @@ -1582,6 +2571,162 @@ bool MYSQL_LOG::is_query_in_union(THD *thd, query_id_t query_id_param) query_id_param >= thd->binlog_evt_union.first_query_id); } + +/* + These functions are placed in this file since they need access to + binlog_hton, which has internal linkage. +*/ + +int THD::binlog_setup_trx_data() +{ + DBUG_ENTER("THD::binlog_setup_trx_data"); + binlog_trx_data *trx_data= + (binlog_trx_data*) ha_data[binlog_hton.slot]; + + if (trx_data) + DBUG_RETURN(0); // Already set up + + ha_data[binlog_hton.slot]= trx_data= + (binlog_trx_data*) my_malloc(sizeof(binlog_trx_data), MYF(MY_ZEROFILL)); + if (!trx_data || + open_cached_file(&trx_data->trans_log, mysql_tmpdir, + LOG_PREFIX, binlog_cache_size, MYF(MY_WME))) + { + my_free((gptr)trx_data, MYF(MY_ALLOW_ZERO_PTR)); + ha_data[binlog_hton.slot]= 0; + DBUG_RETURN(1); // Didn't manage to set it up + } + trx_data->trans_log.end_of_file= max_binlog_cache_size; + DBUG_RETURN(0); +} + +Rows_log_event* +THD::binlog_get_pending_rows_event() const +{ + binlog_trx_data *const trx_data= + (binlog_trx_data*) ha_data[binlog_hton.slot]; + /* + This is less than ideal, but here's the story: If there is no + trx_data, prepare_pending_rows_event() has never been called + (since the trx_data is set up there). In that case, we just return + NULL. + */ + return trx_data ? trx_data->pending : NULL; +} + +void +THD::binlog_set_pending_rows_event(Rows_log_event* ev) +{ + binlog_trx_data *const trx_data= + (binlog_trx_data*) ha_data[binlog_hton.slot]; + DBUG_ASSERT(trx_data); + trx_data->pending= ev; +} + + +/* + Moves the last bunch of rows from the pending Rows event to the binlog + (either cached binlog if transaction, or disk binlog). Sets a new pending + event. +*/ +int MYSQL_LOG::flush_and_set_pending_rows_event(THD *thd, Rows_log_event* event) +{ + DBUG_ENTER("MYSQL_LOG::flush_and_set_pending_rows_event(event)"); + DBUG_ASSERT(thd->current_stmt_binlog_row_based && mysql_bin_log.is_open()); + DBUG_PRINT("enter", ("event=%p", event)); + + int error= 0; + + binlog_trx_data *const trx_data= + (binlog_trx_data*) thd->ha_data[binlog_hton.slot]; + + DBUG_ASSERT(trx_data); + + if (Rows_log_event* pending= trx_data->pending) + { + IO_CACHE *file= &log_file; + + /* + Decide if we should write to the log file directly or to the + transaction log. + */ + if (pending->get_cache_stmt() || my_b_tell(&trx_data->trans_log)) + file= &trx_data->trans_log; + + /* + If we are writing to the log file directly, we could avoid + locking the log. This does not work since we need to step the + m_table_map_version below, and that change has to be protected + by the LOCK_log mutex. + */ + pthread_mutex_lock(&LOCK_log); + + /* + Write a table map if necessary + */ + if (pending->maybe_write_table_map(thd, file, this)) + { + pthread_mutex_unlock(&LOCK_log); + DBUG_RETURN(2); + } + + /* + Write pending event to log file or transaction cache + */ + if (pending->write(file)) + { + pthread_mutex_unlock(&LOCK_log); + DBUG_RETURN(1); + } + + /* + We step the table map version if we are writing an event + representing the end of a statement. We do this regardless of + wheather we write to the transaction cache or to directly to the + file. + + In an ideal world, we could avoid stepping the table map version + if we were writing to a transaction cache, since we could then + reuse the table map that was written earlier in the transaction + cache. This does not work since STMT_END_F implies closing all + table mappings on the slave side. + + TODO: Find a solution so that table maps does not have to be + written several times within a transaction. + */ + if (pending->get_flags(Rows_log_event::STMT_END_F)) + ++m_table_map_version; + + delete pending; + + if (file == &log_file) + { + error= flush_and_sync(); + if (!error) + { + signal_update(); + rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED); + } + } + + pthread_mutex_unlock(&LOCK_log); + } + else if (event && event->get_cache_stmt()) /* && pending == 0 */ + { + /* + If we are setting a non-null event for a table that is + transactional, we start a transaction here as well. + */ + trans_register_ha(thd, + thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN), + &binlog_hton); + } + + trx_data->pending= event; + + DBUG_RETURN(error); +} + /* Write an event to the binary log */ @@ -1602,7 +2747,29 @@ bool MYSQL_LOG::write(Log_event *event_info) thd->binlog_evt_union.unioned_events_trans |= event_info->cache_stmt; DBUG_RETURN(0); } - + + /* + Flush the pending rows event to the transaction cache or to the + log file. Since this function potentially aquire the LOCK_log + mutex, we do this before aquiring the LOCK_log mutex in this + function. + + This is not optimal, but necessary in the current implementation + since there is code that writes rows to system tables without + using some way to flush the pending event (e.g., binlog_query()). + + TODO: There shall be no writes to any system table after calling + binlog_query(), so these writes has to be moved to before the call + of binlog_query() for correct functioning. + + This is necessesary not only for RBR, but the master might crash + after binlogging the query but before changing the system tables. + This means that the slave and the master are not in the same state + (after the master has restarted), so therefore we have to + eliminate this problem. + */ + thd->binlog_flush_pending_rows_event(true); + pthread_mutex_lock(&LOCK_log); /* @@ -1621,10 +2788,11 @@ bool MYSQL_LOG::write(Log_event *event_info) binlog_[wild_]{do|ignore}_table?" (WL#1049)" */ if ((thd && !(thd->options & OPTION_BIN_LOG)) || - (!db_ok(local_db, binlog_do_db, binlog_ignore_db))) + (!binlog_filter->db_ok(local_db))) { VOID(pthread_mutex_unlock(&LOCK_log)); - DBUG_PRINT("error",("!db_ok('%s')", local_db)); + DBUG_PRINT("info",("db_ok('%s')==%d", local_db, + binlog_filter->db_ok(local_db))); DBUG_RETURN(0); } #endif /* HAVE_REPLICATION */ @@ -1641,37 +2809,26 @@ bool MYSQL_LOG::write(Log_event *event_info) */ if (opt_using_transactions && thd) { - IO_CACHE *trans_log= (IO_CACHE*)thd->ha_data[binlog_hton.slot]; + if (thd->binlog_setup_trx_data()) + goto err; - if (event_info->get_cache_stmt()) - { - if (!trans_log) - { - thd->ha_data[binlog_hton.slot]= trans_log= (IO_CACHE *) - my_malloc(sizeof(IO_CACHE), MYF(MY_ZEROFILL)); - if (!trans_log || open_cached_file(trans_log, mysql_tmpdir, - LOG_PREFIX, - binlog_cache_size, MYF(MY_WME))) - { - my_free((gptr)trans_log, MYF(MY_ALLOW_ZERO_PTR)); - thd->ha_data[binlog_hton.slot]= trans_log= 0; - goto err; - } - trans_log->end_of_file= max_binlog_cache_size; - trans_register_ha(thd, - thd->options & (OPTION_NOT_AUTOCOMMIT | - OPTION_BEGIN), - &binlog_hton); - } - else if (!my_b_tell(trans_log)) - trans_register_ha(thd, - thd->options & (OPTION_NOT_AUTOCOMMIT | - OPTION_BEGIN), - &binlog_hton); - file= trans_log; - } - else if (trans_log && my_b_tell(trans_log)) + binlog_trx_data *const trx_data= + (binlog_trx_data*) thd->ha_data[binlog_hton.slot]; + IO_CACHE *trans_log= &trx_data->trans_log; + + if (event_info->get_cache_stmt() && !my_b_tell(trans_log)) + trans_register_ha(thd, + thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN), + &binlog_hton); + + if (event_info->get_cache_stmt() || my_b_tell(trans_log)) file= trans_log; + /* + Note: as Mats suggested, for all the cases above where we write to + trans_log, it sounds unnecessary to lock LOCK_log. We should rather + test first if we want to write to trans_log, and if not, lock + LOCK_log. TODO. + */ } #endif DBUG_PRINT("info",("event type=%d",event_info->get_type_code())); @@ -1686,42 +2843,49 @@ bool MYSQL_LOG::write(Log_event *event_info) of the SQL command */ + /* + If row-based binlogging, Insert_id, Rand and other kind of "setting + context" events are not needed. + */ if (thd) { - if (thd->last_insert_id_used) - { - Intvar_log_event e(thd,(uchar) LAST_INSERT_ID_EVENT, - thd->current_insert_id); - if (e.write(file)) - goto err; - } - if (thd->insert_id_used) + if (!thd->current_stmt_binlog_row_based) { - Intvar_log_event e(thd,(uchar) INSERT_ID_EVENT,thd->last_insert_id); - if (e.write(file)) - goto err; - } - if (thd->rand_used) - { - Rand_log_event e(thd,thd->rand_saved_seed1,thd->rand_saved_seed2); - if (e.write(file)) - goto err; - } - if (thd->user_var_events.elements) - { - for (uint i= 0; i < thd->user_var_events.elements; i++) - { - BINLOG_USER_VAR_EVENT *user_var_event; - get_dynamic(&thd->user_var_events,(gptr) &user_var_event, i); - User_var_log_event e(thd, user_var_event->user_var_event->name.str, - user_var_event->user_var_event->name.length, - user_var_event->value, - user_var_event->length, - user_var_event->type, - user_var_event->charset_number); - if (e.write(file)) - goto err; - } + if (thd->last_insert_id_used) + { + Intvar_log_event e(thd,(uchar) LAST_INSERT_ID_EVENT, + thd->current_insert_id); + if (e.write(file)) + goto err; + } + if (thd->insert_id_used) + { + Intvar_log_event e(thd,(uchar) INSERT_ID_EVENT,thd->last_insert_id); + if (e.write(file)) + goto err; + } + if (thd->rand_used) + { + Rand_log_event e(thd,thd->rand_saved_seed1,thd->rand_saved_seed2); + if (e.write(file)) + goto err; + } + if (thd->user_var_events.elements) + { + for (uint i= 0; i < thd->user_var_events.elements; i++) + { + BINLOG_USER_VAR_EVENT *user_var_event; + get_dynamic(&thd->user_var_events,(gptr) &user_var_event, i); + User_var_log_event e(thd, user_var_event->user_var_event->name.str, + user_var_event->user_var_event->name.length, + user_var_event->value, + user_var_event->length, + user_var_event->type, + user_var_event->charset_number); + if (e.write(file)) + goto err; + } + } } } @@ -1752,10 +2916,41 @@ err: } } + if (event_info->flags & LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F) + ++m_table_map_version; + pthread_mutex_unlock(&LOCK_log); DBUG_RETURN(error); } + +int error_log_print(enum loglevel level, const char *format, + va_list args) +{ + return logger.error_log_print(level, format, args); +} + + +bool slow_log_print(THD *thd, const char *query, uint query_length, + time_t query_start_arg) +{ + return logger.slow_log_print(thd, query, query_length, query_start_arg); +} + + +bool general_log_print(THD *thd, enum enum_server_command command, + const char *format, ...) +{ + va_list args; + uint error= 0; + + va_start(args, format); + error= logger.general_log_print(thd, command, format, args); + va_end(args); + + return error; +} + void MYSQL_LOG::rotate_and_purge(uint flags) { if (!(flags & RP_LOCK_LOG_IS_ALREADY_LOCKED)) @@ -1902,71 +3097,86 @@ err: /* - Write to the slow query log. + Log a query to the traditional slow log file + + SYNOPSIS + write() + + thd THD of the query + current_time current timestamp + query_start_arg command start timestamp + user_host the pointer to the string with user@host info + user_host_len length of the user_host string. this is computed once + and passed to all general log event handlers + query_time Amount of time the query took to execute (in seconds) + lock_time Amount of time the query was locked (in seconds) + is_command The flag, which determines, whether the sql_text is a + query or an administrator command. + sql_text the very text of the query or administrator command + processed + sql_text_len the length of sql_text string + + DESCRIPTION + + Log a query to the slow log file. + + RETURN + FALSE - OK + TRUE - error occured */ -bool MYSQL_LOG::write(THD *thd,const char *query, uint query_length, - time_t query_start_arg) +bool MYSQL_LOG::write(THD *thd, time_t current_time, time_t query_start_arg, + const char *user_host, uint user_host_len, + longlong query_time, longlong lock_time, bool is_command, + const char *sql_text, uint sql_text_len) { - bool error=0; - time_t current_time; - if (!is_open()) - return 0; + bool error= 0; DBUG_ENTER("MYSQL_LOG::write"); - VOID(pthread_mutex_lock(&LOCK_log)); + if (!is_open()) + DBUG_RETURN(0); + if (is_open()) { // Safety agains reopen - int tmp_errno=0; - char buff[80],*end; - end=buff; - if (!(thd->options & OPTION_UPDATE_LOG)) - { - VOID(pthread_mutex_unlock(&LOCK_log)); - DBUG_RETURN(0); - } - if (!(specialflag & SPECIAL_SHORT_LOG_FORMAT) || query_start_arg) + int tmp_errno= 0; + char buff[80], *end; + uint buff_len; + end= buff; + + if (!(specialflag & SPECIAL_SHORT_LOG_FORMAT)) { Security_context *sctx= thd->security_ctx; - current_time=time(NULL); if (current_time != last_time) { - last_time=current_time; - struct tm tm_tmp; - struct tm *start; - localtime_r(¤t_time,&tm_tmp); - start=&tm_tmp; + last_time= current_time; + struct tm start; + localtime_r(¤t_time, &start); + + buff_len= my_snprintf(buff, sizeof buff, + "# Time: %02d%02d%02d %2d:%02d:%02d\n", + start.tm_year % 100, start.tm_mon + 1, + start.tm_mday, start.tm_hour, + start.tm_min, start.tm_sec); + /* Note that my_b_write() assumes it knows the length for this */ - sprintf(buff,"# Time: %02d%02d%02d %2d:%02d:%02d\n", - start->tm_year % 100, - start->tm_mon+1, - start->tm_mday, - start->tm_hour, - start->tm_min, - start->tm_sec); - if (my_b_write(&log_file, (byte*) buff,24)) + if (my_b_write(&log_file, (byte*) buff, buff_len)) tmp_errno=errno; } - if (my_b_printf(&log_file, "# User@Host: %s[%s] @ %s [%s]\n", - sctx->priv_user ? - sctx->priv_user : "", - sctx->user ? sctx->user : "", - sctx->host ? sctx->host : "", - sctx->ip ? sctx->ip : "") == - (uint) -1) + if (my_b_printf(&log_file, "# User@Host: ", sizeof("# User@Host: ") - 1)) tmp_errno=errno; - } - if (query_start_arg) - { - /* For slow query log */ - if (my_b_printf(&log_file, - "# Query_time: %lu Lock_time: %lu Rows_sent: %lu Rows_examined: %lu\n", - (ulong) (current_time - query_start_arg), - (ulong) (thd->time_after_lock - query_start_arg), - (ulong) thd->sent_row_count, - (ulong) thd->examined_row_count) == (uint) -1) + if (my_b_printf(&log_file, user_host, user_host_len)) + tmp_errno=errno; + if (my_b_write(&log_file, (byte*) "\n", 1)) tmp_errno=errno; } + /* For slow query log */ + if (my_b_printf(&log_file, + "# Query_time: %lu Lock_time: %lu" + " Rows_sent: %lu Rows_examined: %lu\n", + (ulong) query_time, (ulong) lock_time, + (ulong) thd->sent_row_count, + (ulong) thd->examined_row_count) == (uint) -1) + tmp_errno=errno; if (thd->db && strcmp(thd->db,db)) { // Database changed if (my_b_printf(&log_file,"use %s;\n",thd->db) == (uint) -1) @@ -1987,15 +3197,15 @@ bool MYSQL_LOG::write(THD *thd,const char *query, uint query_length, end=longlong10_to_str((longlong) thd->last_insert_id,end,-10); } } - if (thd->query_start_used) - { - if (query_start_arg != thd->query_start()) - { - query_start_arg=thd->query_start(); - end=strmov(end,",timestamp="); - end=int10_to_str((long) query_start_arg,end,10); - } - } + + /* + This info used to show up randomly, depending on whether the query + checked the query start time or not. now we always write current + timestamp to the slow log + */ + end= strmov(end, ",timestamp="); + end= int10_to_str((long) current_time, end, 10); + if (end != buff) { *end++=';'; @@ -2004,14 +3214,13 @@ bool MYSQL_LOG::write(THD *thd,const char *query, uint query_length, my_b_write(&log_file, (byte*) buff+1,(uint) (end-buff))) tmp_errno=errno; } - if (!query) + if (is_command) { - end=strxmov(buff, "# administrator command: ", - command_name[thd->command], NullS); - query_length=(ulong) (end-buff); - query=buff; + end= strxmov(buff, "# administrator command: ", NullS); + buff_len= (ulong) (end - buff); + my_b_write(&log_file, (byte*) buff, buff_len); } - if (my_b_write(&log_file, (byte*) query,query_length) || + if (my_b_write(&log_file, (byte*) sql_text, sql_text_len) || my_b_write(&log_file, (byte*) ";\n",2) || flush_io_cache(&log_file)) tmp_errno=errno; @@ -2025,7 +3234,6 @@ bool MYSQL_LOG::write(THD *thd,const char *query, uint query_length, } } } - VOID(pthread_mutex_unlock(&LOCK_log)); DBUG_RETURN(error); } @@ -2218,6 +3426,7 @@ void print_buffer_to_file(enum loglevel level, const char *buffer) skr=time(NULL); localtime_r(&skr, &tm_tmp); start=&tm_tmp; + fprintf(stderr, "%02d%02d%02d %2d:%02d:%02d [%s] %s\n", start->tm_year % 100, start->tm_mon+1, @@ -2301,6 +3510,44 @@ void MYSQL_LOG::signal_update() DBUG_VOID_RETURN; } +#ifndef MYSQL_CLIENT +bool MYSQL_LOG::write_table_map(THD *thd, IO_CACHE *file, TABLE* table, + bool is_transactional) +{ + DBUG_ENTER("MYSQL_LOG::write_table_map()"); + DBUG_PRINT("enter", ("table=%p (%s: %u)", + table, table->s->table_name, table->s->table_map_id)); + + /* Pre-conditions */ + DBUG_ASSERT(thd->current_stmt_binlog_row_based && is_open()); + DBUG_ASSERT(table->s->table_map_id != ULONG_MAX); + +#ifndef DBUG_OFF + /* + We only need to execute under the LOCK_log mutex if we are writing + to the log file; otherwise, we are writing to a thread-specific + transaction cache and there is no need to serialize this event + with events in other threads. + */ + if (file == &log_file) + safe_mutex_assert_owner(&LOCK_log); +#endif + + Table_map_log_event::flag_set const + flags= Table_map_log_event::TM_NO_FLAGS; + + Table_map_log_event + the_event(thd, table, table->s->table_map_id, is_transactional, flags); + + if (the_event.write(file)) + DBUG_RETURN(1); + + table->s->table_map_version= m_table_map_version; + DBUG_RETURN(0); +} +#endif /* !defined(MYSQL_CLIENT) */ + + #ifdef __NT__ void print_buffer_to_nt_eventlog(enum loglevel level, char *buff, uint length, int buffLen) @@ -2366,23 +3613,26 @@ void print_buffer_to_nt_eventlog(enum loglevel level, char *buff, to other functions to write that message to other logging sources. RETURN VALUES - void + The function always returns 0. The return value is present in the + signature to be compatible with other logging routines, which could + return an error (e.g. logging to the log tables) */ -void vprint_msg_to_log(enum loglevel level, const char *format, va_list args) +int vprint_msg_to_log(enum loglevel level, const char *format, va_list args) { char buff[1024]; uint length; DBUG_ENTER("vprint_msg_to_log"); - length= my_vsnprintf(buff, sizeof(buff)-5, format, args); + /* "- 5" is because of print_buffer_to_nt_eventlog() */ + length= my_vsnprintf(buff, sizeof(buff) - 5, format, args); print_buffer_to_file(level, buff); #ifdef __NT__ print_buffer_to_nt_eventlog(level, buff, length, sizeof(buff)); #endif - DBUG_VOID_RETURN; + DBUG_RETURN(0); } @@ -2392,7 +3642,7 @@ void sql_print_error(const char *format, ...) DBUG_ENTER("sql_print_error"); va_start(args, format); - vprint_msg_to_log(ERROR_LEVEL, format, args); + error_log_print(ERROR_LEVEL, format, args); va_end(args); DBUG_VOID_RETURN; @@ -2405,7 +3655,7 @@ void sql_print_warning(const char *format, ...) DBUG_ENTER("sql_print_warning"); va_start(args, format); - vprint_msg_to_log(WARNING_LEVEL, format, args); + error_log_print(WARNING_LEVEL, format, args); va_end(args); DBUG_VOID_RETURN; @@ -2418,7 +3668,7 @@ void sql_print_information(const char *format, ...) DBUG_ENTER("sql_print_information"); va_start(args, format); - vprint_msg_to_log(INFORMATION_LEVEL, format, args); + error_log_print(INFORMATION_LEVEL, format, args); va_end(args); DBUG_VOID_RETURN; @@ -3007,9 +4257,11 @@ void TC_LOG_BINLOG::close() */ int TC_LOG_BINLOG::log(THD *thd, my_xid xid) { + DBUG_ENTER("TC_LOG_BINLOG::log"); Xid_log_event xle(thd, xid); - IO_CACHE *trans_log= (IO_CACHE*)thd->ha_data[binlog_hton.slot]; - return !binlog_end_trans(thd, trans_log, &xle); // invert return value + binlog_trx_data *trx_data= + (binlog_trx_data*) thd->ha_data[binlog_hton.slot]; + DBUG_RETURN(!binlog_end_trans(thd, trx_data, &xle)); // invert return value } void TC_LOG_BINLOG::unlog(ulong cookie, my_xid xid) diff --git a/sql/log.h b/sql/log.h new file mode 100644 index 00000000000..8a83e7b66d0 --- /dev/null +++ b/sql/log.h @@ -0,0 +1,530 @@ +/* Copyright (C) 2005 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef LOG_H +#define LOG_H + +struct st_relay_log_info; + +class Format_description_log_event; + +/* + Transaction Coordinator log - a base abstract class + for two different implementations +*/ +class TC_LOG +{ + public: + int using_heuristic_recover(); + TC_LOG() {} + virtual ~TC_LOG() {} + + virtual int open(const char *opt_name)=0; + virtual void close()=0; + virtual int log(THD *thd, my_xid xid)=0; + virtual void unlog(ulong cookie, my_xid xid)=0; +}; + +class TC_LOG_DUMMY: public TC_LOG // use it to disable the logging +{ + public: + int open(const char *opt_name) { return 0; } + void close() { } + int log(THD *thd, my_xid xid) { return 1; } + void unlog(ulong cookie, my_xid xid) { } +}; + +#ifdef HAVE_MMAP +class TC_LOG_MMAP: public TC_LOG +{ + public: // only to keep Sun Forte on sol9x86 happy + typedef enum { + POOL, // page is in pool + ERROR, // last sync failed + DIRTY // new xids added since last sync + } PAGE_STATE; + + private: + typedef struct st_page { + struct st_page *next; // page a linked in a fifo queue + my_xid *start, *end; // usable area of a page + my_xid *ptr; // next xid will be written here + int size, free; // max and current number of free xid slots on the page + int waiters; // number of waiters on condition + PAGE_STATE state; // see above + pthread_mutex_t lock; // to access page data or control structure + pthread_cond_t cond; // to wait for a sync + } PAGE; + + char logname[FN_REFLEN]; + File fd; + my_off_t file_length; + uint npages, inited; + uchar *data; + struct st_page *pages, *syncing, *active, *pool, *pool_last; + /* + note that, e.g. LOCK_active is only used to protect + 'active' pointer, to protect the content of the active page + one has to use active->lock. + Same for LOCK_pool and LOCK_sync + */ + pthread_mutex_t LOCK_active, LOCK_pool, LOCK_sync; + pthread_cond_t COND_pool, COND_active; + + public: + TC_LOG_MMAP(): inited(0) {} + int open(const char *opt_name); + void close(); + int log(THD *thd, my_xid xid); + void unlog(ulong cookie, my_xid xid); + int recover(); + + private: + void get_active_from_pool(); + int sync(); + int overflow(); +}; +#else +#define TC_LOG_MMAP TC_LOG_DUMMY +#endif + +extern TC_LOG *tc_log; +extern TC_LOG_MMAP tc_log_mmap; +extern TC_LOG_DUMMY tc_log_dummy; + +/* log info errors */ +#define LOG_INFO_EOF -1 +#define LOG_INFO_IO -2 +#define LOG_INFO_INVALID -3 +#define LOG_INFO_SEEK -4 +#define LOG_INFO_MEM -6 +#define LOG_INFO_FATAL -7 +#define LOG_INFO_IN_USE -8 + +/* bitmap to SQL_LOG::close() */ +#define LOG_CLOSE_INDEX 1 +#define LOG_CLOSE_TO_BE_OPENED 2 +#define LOG_CLOSE_STOP_EVENT 4 + +struct st_relay_log_info; + +typedef struct st_log_info +{ + char log_file_name[FN_REFLEN]; + my_off_t index_file_offset, index_file_start_offset; + my_off_t pos; + bool fatal; // if the purge happens to give us a negative offset + pthread_mutex_t lock; + st_log_info():fatal(0) { pthread_mutex_init(&lock, MY_MUTEX_INIT_FAST);} + ~st_log_info() { pthread_mutex_destroy(&lock);} +} LOG_INFO; + +/* + Currently we have only 3 kinds of logging functions: old-fashioned + logs, stdout and csv logging routines. +*/ +#define MAX_LOG_HANDLERS_NUM 3 + +/* log event handler flags */ +#define LOG_NONE 1 +#define LOG_FILE 2 +#define LOG_TABLE 4 + +class Log_event; +class Rows_log_event; + +enum enum_log_type { LOG_CLOSED, LOG_TO_BE_OPENED, LOG_NORMAL, LOG_NEW, LOG_BIN}; + +/* + TODO split MYSQL_LOG into base MYSQL_LOG and + MYSQL_QUERY_LOG, MYSQL_SLOW_LOG, MYSQL_BIN_LOG + most of the code from MYSQL_LOG should be in the MYSQL_BIN_LOG + only (TC_LOG included) + + TODO use mmap instead of IO_CACHE for binlog + (mmap+fsync is two times faster than write+fsync) +*/ + +class MYSQL_LOG: public TC_LOG +{ + private: + /* LOCK_log and LOCK_index are inited by init_pthread_objects() */ + pthread_mutex_t LOCK_log, LOCK_index; + pthread_mutex_t LOCK_prep_xids; + pthread_cond_t COND_prep_xids; + pthread_cond_t update_cond; + ulonglong bytes_written; + time_t last_time,query_start; + IO_CACHE log_file; + IO_CACHE index_file; + char *name; + char time_buff[20],db[NAME_LEN+1]; + char log_file_name[FN_REFLEN],index_file_name[FN_REFLEN]; + /* + The max size before rotation (usable only if log_type == LOG_BIN: binary + logs and relay logs). + For a binlog, max_size should be max_binlog_size. + For a relay log, it should be max_relay_log_size if this is non-zero, + max_binlog_size otherwise. + max_size is set in init(), and dynamically changed (when one does SET + GLOBAL MAX_BINLOG_SIZE|MAX_RELAY_LOG_SIZE) by fix_max_binlog_size and + fix_max_relay_log_size). + */ + ulong max_size; + ulong prepared_xids; /* for tc log - number of xids to remember */ + volatile enum_log_type log_type; + enum cache_type io_cache_type; + // current file sequence number for load data infile binary logging + uint file_id; + uint open_count; // For replication + int readers_count; + bool write_error, inited; + bool need_start_event; + /* + no_auto_events means we don't want any of these automatic events : + Start/Rotate/Stop. That is, in 4.x when we rotate a relay log, we don't + want a Rotate_log event to be written to the relay log. When we start a + relay log etc. So in 4.x this is 1 for relay logs, 0 for binlogs. + In 5.0 it's 0 for relay logs too! + */ + bool no_auto_events; + friend class Log_event; + +public: + ulonglong m_table_map_version; + + /* + These describe the log's format. This is used only for relay logs. + _for_exec is used by the SQL thread, _for_queue by the I/O thread. It's + necessary to have 2 distinct objects, because the I/O thread may be reading + events in a different format from what the SQL thread is reading (consider + the case of a master which has been upgraded from 5.0 to 5.1 without doing + RESET MASTER, or from 4.x to 5.0). + */ + Format_description_log_event *description_event_for_exec, + *description_event_for_queue; + + MYSQL_LOG(); + /* + note that there's no destructor ~MYSQL_LOG() ! + The reason is that we don't want it to be automatically called + on exit() - but only during the correct shutdown process + */ + + int open(const char *opt_name); + void close(); + int log(THD *thd, my_xid xid); + void unlog(ulong cookie, my_xid xid); + int recover(IO_CACHE *log, Format_description_log_event *fdle); +#if !defined(MYSQL_CLIENT) + bool is_table_mapped(TABLE *table) const + { + return table->s->table_map_version == m_table_map_version; + } + + int flush_and_set_pending_rows_event(THD *thd, Rows_log_event* event); + +#endif /* !defined(MYSQL_CLIENT) */ + void reset_bytes_written() + { + bytes_written = 0; + } + void harvest_bytes_written(ulonglong* counter) + { +#ifndef DBUG_OFF + char buf1[22],buf2[22]; +#endif + DBUG_ENTER("harvest_bytes_written"); + (*counter)+=bytes_written; + DBUG_PRINT("info",("counter: %s bytes_written: %s", llstr(*counter,buf1), + llstr(bytes_written,buf2))); + bytes_written=0; + DBUG_VOID_RETURN; + } + void set_max_size(ulong max_size_arg); + void signal_update(); + void wait_for_update(THD* thd, bool master_or_slave); + void set_need_start_event() { need_start_event = 1; } + void init(enum_log_type log_type_arg, + enum cache_type io_cache_type_arg, + bool no_auto_events_arg, ulong max_size); + void init_pthread_objects(); + void cleanup(); + bool open(const char *log_name, + enum_log_type log_type, + const char *new_name, + enum cache_type io_cache_type_arg, + bool no_auto_events_arg, ulong max_size, + bool null_created); + const char *generate_name(const char *log_name, const char *suffix, + bool strip_ext, char *buff); + /* simplified open_xxx wrappers for the gigantic open above */ + bool open_query_log(const char *log_name) + { + char buf[FN_REFLEN]; + return open(generate_name(log_name, ".log", 0, buf), + LOG_NORMAL, 0, WRITE_CACHE, 0, 0, 0); + } + bool open_slow_log(const char *log_name) + { + char buf[FN_REFLEN]; + return open(generate_name(log_name, "-slow.log", 0, buf), + LOG_NORMAL, 0, WRITE_CACHE, 0, 0, 0); + } + bool open_index_file(const char *index_file_name_arg, + const char *log_name); + void new_file(bool need_lock); + /* log a command to the old-fashioned general log */ + bool write(time_t event_time, const char *user_host, + uint user_host_len, int thread_id, + const char *command_type, uint command_type_len, + const char *sql_text, uint sql_text_len); + + /* log a query to the old-fashioned slow query log */ + bool write(THD *thd, time_t current_time, time_t query_start_arg, + const char *user_host, uint user_host_len, + longlong query_time, longlong lock_time, bool is_command, + const char *sql_text, uint sql_text_len); + + bool write(Log_event* event_info); // binary log write + bool write(THD *thd, IO_CACHE *cache, Log_event *commit_event); + + bool write_table_map(THD *thd, IO_CACHE *cache, TABLE *table, bool is_trans); + + void start_union_events(THD *thd); + void stop_union_events(THD *thd); + bool is_query_in_union(THD *thd, query_id_t query_id_param); + + /* + v stands for vector + invoked as appendv(buf1,len1,buf2,len2,...,bufn,lenn,0) + */ + bool appendv(const char* buf,uint len,...); + bool append(Log_event* ev); + + int generate_new_name(char *new_name,const char *old_name); + void make_log_name(char* buf, const char* log_ident); + bool is_active(const char* log_file_name); + int update_log_index(LOG_INFO* linfo, bool need_update_threads); + void rotate_and_purge(uint flags); + bool flush_and_sync(); + int purge_logs(const char *to_log, bool included, + bool need_mutex, bool need_update_threads, + ulonglong *decrease_log_space); + int purge_logs_before_date(time_t purge_time); + int purge_first_log(struct st_relay_log_info* rli, bool included); + bool reset_logs(THD* thd); + void close(uint exiting); + + // iterating through the log index file + int find_log_pos(LOG_INFO* linfo, const char* log_name, + bool need_mutex); + int find_next_log(LOG_INFO* linfo, bool need_mutex); + int get_current_log(LOG_INFO* linfo); + uint next_file_id(); + inline bool is_open() { return log_type != LOG_CLOSED; } + inline char* get_index_fname() { return index_file_name;} + inline char* get_log_fname() { return log_file_name; } + inline char* get_name() { return name; } + inline pthread_mutex_t* get_log_lock() { return &LOCK_log; } + inline IO_CACHE* get_log_file() { return &log_file; } + + inline void lock_index() { pthread_mutex_lock(&LOCK_index);} + inline void unlock_index() { pthread_mutex_unlock(&LOCK_index);} + inline IO_CACHE *get_index_file() { return &index_file;} + inline uint32 get_open_count() { return open_count; } +}; + +class Log_event_handler +{ +public: + virtual bool init()= 0; + virtual void cleanup()= 0; + + virtual bool log_slow(THD *thd, time_t current_time, + time_t query_start_arg, const char *user_host, + uint user_host_len, longlong query_time, + longlong lock_time, bool is_command, + const char *sql_text, uint sql_text_len)= 0; + virtual bool log_error(enum loglevel level, const char *format, + va_list args)= 0; + virtual bool log_general(time_t event_time, const char *user_host, + uint user_host_len, int thread_id, + const char *command_type, uint command_type_len, + const char *sql_text, uint sql_text_len, + CHARSET_INFO *client_cs)= 0; + virtual ~Log_event_handler() {} +}; + + +class Log_to_csv_event_handler: public Log_event_handler +{ + /* + We create artificial THD for each of the logs. This is to avoid + locking issues: we don't want locks on the log tables reside in the + THD's of the query. The reason is the locking order and duration. + */ + THD *general_log_thd, *slow_log_thd; + friend class LOGGER; + TABLE_LIST general_log, slow_log; + +private: + bool open_log_table(uint log_type); + +public: + Log_to_csv_event_handler(); + ~Log_to_csv_event_handler(); + virtual bool init(); + virtual void cleanup(); + + virtual bool log_slow(THD *thd, time_t current_time, + time_t query_start_arg, const char *user_host, + uint user_host_len, longlong query_time, + longlong lock_time, bool is_command, + const char *sql_text, uint sql_text_len); + virtual bool log_error(enum loglevel level, const char *format, + va_list args); + virtual bool log_general(time_t event_time, const char *user_host, + uint user_host_len, int thread_id, + const char *command_type, uint command_type_len, + const char *sql_text, uint sql_text_len, + CHARSET_INFO *client_cs); + bool flush(THD *thd, TABLE_LIST *close_slow_Log, + TABLE_LIST* close_general_log); + void close_log_table(uint log_type, bool lock_in_use); + bool reopen_log_table(uint log_type); +}; + + +class Log_to_file_event_handler: public Log_event_handler +{ + MYSQL_LOG mysql_log, mysql_slow_log; + bool is_initialized; +public: + Log_to_file_event_handler(): is_initialized(FALSE) + {} + virtual bool init(); + virtual void cleanup(); + + virtual bool log_slow(THD *thd, time_t current_time, + time_t query_start_arg, const char *user_host, + uint user_host_len, longlong query_time, + longlong lock_time, bool is_command, + const char *sql_text, uint sql_text_len); + virtual bool log_error(enum loglevel level, const char *format, + va_list args); + virtual bool log_general(time_t event_time, const char *user_host, + uint user_host_len, int thread_id, + const char *command_type, uint command_type_len, + const char *sql_text, uint sql_text_len, + CHARSET_INFO *client_cs); + void flush(); + void init_pthread_objects(); +}; + + +/* Class which manages slow, general and error log event handlers */ +class LOGGER +{ + pthread_mutex_t LOCK_logger; + /* flag to check whether logger mutex is initialized */ + uint inited; + + /* available log handlers */ + Log_to_csv_event_handler *table_log_handler; + Log_to_file_event_handler *file_log_handler; + + /* NULL-terminated arrays of log handlers */ + Log_event_handler *error_log_handler_list[MAX_LOG_HANDLERS_NUM + 1]; + Log_event_handler *slow_log_handler_list[MAX_LOG_HANDLERS_NUM + 1]; + Log_event_handler *general_log_handler_list[MAX_LOG_HANDLERS_NUM + 1]; + +public: + + bool is_log_tables_initialized; + + LOGGER() : inited(0), table_log_handler(NULL), + file_log_handler(NULL), is_log_tables_initialized(FALSE) + {} + void lock() { (void) pthread_mutex_lock(&LOCK_logger); } + void unlock() { (void) pthread_mutex_unlock(&LOCK_logger); } + /* + We want to initialize all log mutexes as soon as possible, + but we cannot do it in constructor, as safe_mutex relies on + initialization, performed by MY_INIT(). This why this is done in + this function. + */ + void init_base(); + void init_log_tables(); + bool flush_logs(THD *thd); + THD *get_general_log_thd() + { + if (table_log_handler) + return (THD *) table_log_handler->general_log_thd; + else + return NULL; + } + THD *get_slow_log_thd() + { + if (table_log_handler) + return (THD *) table_log_handler->slow_log_thd; + else + return NULL; + } + /* Perform basic logger cleanup. this will leave e.g. error log open. */ + void cleanup_base(); + /* Free memory. Nothing could be logged after this function is called */ + void cleanup_end(); + bool error_log_print(enum loglevel level, const char *format, + va_list args); + bool slow_log_print(THD *thd, const char *query, uint query_length, + time_t query_start_arg); + bool general_log_print(THD *thd,enum enum_server_command command, + const char *format, va_list args); + + void close_log_table(uint log_type, bool lock_in_use); + bool reopen_log_table(uint log_type); + + /* we use this function to setup all enabled log event handlers */ + int set_handlers(uint error_log_printer, + uint slow_log_printer, + uint general_log_printer); + void init_error_log(uint error_log_printer); + void init_slow_log(uint slow_log_printer); + void init_general_log(uint general_log_printer); + }; + + +enum enum_binlog_format { + BINLOG_FORMAT_STMT= 0, // statement-based +#ifdef HAVE_ROW_BASED_REPLICATION + BINLOG_FORMAT_ROW= 1, // row_based + /* + statement-based except for cases where only row-based can work (UUID() + etc): + */ + BINLOG_FORMAT_MIXED= 2, +#endif +/* + This value is last, after the end of binlog_format_typelib: it has no + corresponding cell in this typelib. We use this value to be able to know if + the user has explicitely specified a binlog format at startup or not. +*/ + BINLOG_FORMAT_UNSPEC= 3 +}; +extern TYPELIB binlog_format_typelib; + +#endif /* LOG_H */ diff --git a/sql/log_event.cc b/sql/log_event.cc index 5ca7c00ee8f..e589f46e0e0 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -21,10 +21,13 @@ #pragma implementation // gcc: Class implementation #endif -#include "mysql_priv.h" +#include "mysql_priv.h" #include "slave.h" +#include "rpl_filter.h" #include <my_dir.h> #endif /* MYSQL_CLIENT */ +#include <base64.h> +#include <my_bitmap.h> #define log_cs &my_charset_latin1 @@ -75,6 +78,20 @@ static void clear_all_errors(THD *thd, struct st_relay_log_info *rli) inline int ignored_error_code(int err_code) { +#ifdef HAVE_NDB_BINLOG + /* + The following error codes are hard-coded and will always be ignored. + */ + switch (err_code) + { + case ER_DB_CREATE_EXISTS: + case ER_DB_DROP_EXISTS: + return 1; + default: + /* Nothing to do */ + break; + } +#endif return ((err_code == ER_SLAVE_IGNORED_TABLE) || (use_slave_mask && bitmap_is_set(&slave_error_mask, err_code))); } @@ -244,6 +261,7 @@ char *str_to_hex(char *to, const char *from, uint len) commands just before it prints a query. */ +#ifdef MYSQL_CLIENT static void print_set_option(FILE* file, uint32 bits_changed, uint32 option, uint32 flags, const char* name, bool* need_comma) { @@ -255,6 +273,7 @@ static void print_set_option(FILE* file, uint32 bits_changed, uint32 option, *need_comma= 1; } } +#endif /************************************************************************** Log_event methods (= the parent class of all events) @@ -283,6 +302,10 @@ const char* Log_event::get_type_str() case XID_EVENT: return "Xid"; case USER_VAR_EVENT: return "User var"; case FORMAT_DESCRIPTION_EVENT: return "Format_desc"; + case TABLE_MAP_EVENT: return "Table_map"; + case WRITE_ROWS_EVENT: return "Write_rows"; + case UPDATE_ROWS_EVENT: return "Update_rows"; + case DELETE_ROWS_EVENT: return "Delete_rows"; case BEGIN_LOAD_QUERY_EVENT: return "Begin_load_query"; case EXECUTE_LOAD_QUERY_EVENT: return "Execute_load_query"; default: return "Unknown"; /* impossible */ @@ -790,6 +813,9 @@ Log_event* Log_event::read_log_event(const char* buf, uint event_len, DBUG_RETURN(NULL); // general sanity check - will fail on a partial read } + /* To check the integrity of the Log_event_type enumeration */ + DBUG_ASSERT(buf[EVENT_TYPE_OFFSET] < ENUM_END_EVENT); + switch(buf[EVENT_TYPE_OFFSET]) { case QUERY_EVENT: ev = new Query_log_event(buf, event_len, description_event, QUERY_EVENT); @@ -841,6 +867,20 @@ Log_event* Log_event::read_log_event(const char* buf, uint event_len, case FORMAT_DESCRIPTION_EVENT: ev = new Format_description_log_event(buf, event_len, description_event); break; +#if defined(HAVE_REPLICATION) && defined(HAVE_ROW_BASED_REPLICATION) + case WRITE_ROWS_EVENT: + ev = new Write_rows_log_event(buf, event_len, description_event); + break; + case UPDATE_ROWS_EVENT: + ev = new Update_rows_log_event(buf, event_len, description_event); + break; + case DELETE_ROWS_EVENT: + ev = new Delete_rows_log_event(buf, event_len, description_event); + break; + case TABLE_MAP_EVENT: + ev = new Table_map_log_event(buf, event_len, description_event); + break; +#endif case BEGIN_LOAD_QUERY_EVENT: ev = new Begin_load_query_log_event(buf, event_len, description_event); break; @@ -960,6 +1000,24 @@ void Log_event::print_header(FILE* file, PRINT_EVENT_INFO* print_event_info) } +void Log_event::print_base64(FILE* file, PRINT_EVENT_INFO* print_event_info) +{ + uchar *ptr= (uchar*)temp_buf; + my_off_t size= uint4korr(ptr + EVENT_LEN_OFFSET); + + char *tmp_str= + (char *) my_malloc(base64_needed_encoded_length(size), MYF(MY_WME)); + if (!tmp_str) { + fprintf(stderr, "\nError: Out of memory. " + "Could not print correct binlog event.\n"); + return; + } + int res= base64_encode(ptr, size, tmp_str); + fprintf(file, "\nBINLOG '\n%s\n';\n", tmp_str); + my_free(tmp_str, MYF(0)); +} + + /* Log_event::print_timestamp() */ @@ -1609,7 +1667,7 @@ int Query_log_event::exec_event(struct st_relay_log_info* rli, const char *query */ thd->catalog= catalog_len ? (char *) catalog : (char *)""; thd->db_length= db_len; - thd->db= (char*) rewrite_db(db, &thd->db_length); + thd->db= (char *) rpl_filter->get_rewrite_db(db, &thd->db_length); thd->variables.auto_increment_increment= auto_increment_increment; thd->variables.auto_increment_offset= auto_increment_offset; @@ -1638,7 +1696,7 @@ int Query_log_event::exec_event(struct st_relay_log_info* rli, const char *query ::exec_event(), then the companion SET also have so we don't need to reset_one_shot_variables(). */ - if (db_ok(thd->db, replicate_do_db, replicate_ignore_db)) + if (rpl_filter->db_ok(thd->db)) { thd->set_time((time_t)when); thd->query_length= q_len_arg; @@ -1727,7 +1785,7 @@ int Query_log_event::exec_event(struct st_relay_log_info* rli, const char *query clear_all_errors(thd, rli); /* Can ignore query */ else { - slave_print_error(rli,expected_error, + slave_print_msg(ERROR_LEVEL, rli, expected_error, "\ Query partially completed on the master (error on master: %d) \ and was aborted. There is a chance that your master is inconsistent at this \ @@ -1741,7 +1799,7 @@ START SLAVE; . Query: '%s'", expected_error, thd->query); /* If the query was not ignored, it is printed to the general log */ if (thd->net.last_errno != ER_SLAVE_IGNORED_TABLE) - mysql_log.write(thd,COM_QUERY,"%s",thd->query); + general_log_print(thd, COM_QUERY, "%s", thd->query); compare_errors: @@ -1756,16 +1814,16 @@ compare_errors: !ignored_error_code(actual_error) && !ignored_error_code(expected_error)) { - slave_print_error(rli, 0, - "\ -Query caused different errors on master and slave. \ + slave_print_msg(ERROR_LEVEL, rli, 0, + "\ +Query caused different errors on master and slave. \ Error on master: '%s' (%d), Error on slave: '%s' (%d). \ Default database: '%s'. Query: '%s'", - ER_SAFE(expected_error), - expected_error, - actual_error ? thd->net.last_error: "no error", - actual_error, - print_slave_db_safe(db), query_arg); + ER_SAFE(expected_error), + expected_error, + actual_error ? thd->net.last_error: "no error", + actual_error, + print_slave_db_safe(db), query_arg); thd->query_error= 1; } /* @@ -1782,11 +1840,11 @@ Default database: '%s'. Query: '%s'", */ else if (thd->query_error || thd->is_fatal_error) { - slave_print_error(rli,actual_error, - "Error '%s' on query. Default database: '%s'. Query: '%s'", - (actual_error ? thd->net.last_error : - "unexpected success or fatal error"), - print_slave_db_safe(thd->db), query_arg); + slave_print_msg(ERROR_LEVEL, rli, actual_error, + "Error '%s' on query. Default database: '%s'. Query: '%s'", + (actual_error ? thd->net.last_error : + "unexpected success or fatal error"), + print_slave_db_safe(thd->db), query_arg); thd->query_error= 1; } @@ -2068,6 +2126,25 @@ Format_description_log_event(uint8 binlog_ver, const char* server_ver) post_header_len[DELETE_FILE_EVENT-1]= DELETE_FILE_HEADER_LEN; post_header_len[NEW_LOAD_EVENT-1]= post_header_len[LOAD_EVENT-1]; post_header_len[FORMAT_DESCRIPTION_EVENT-1]= FORMAT_DESCRIPTION_HEADER_LEN; + post_header_len[TABLE_MAP_EVENT-1]= TABLE_MAP_HEADER_LEN; + post_header_len[WRITE_ROWS_EVENT-1]= ROWS_HEADER_LEN; + post_header_len[UPDATE_ROWS_EVENT-1]= ROWS_HEADER_LEN; + post_header_len[DELETE_ROWS_EVENT-1]= ROWS_HEADER_LEN; + /* + We here have the possibility to simulate a master of before we changed + the table map id to be stored in 6 bytes: when it was stored in 4 + bytes (=> post_header_len was 6). This is used to test backward + compatibility. + This code can be removed after a few months (today is Dec 21st 2005), + when we know that the 4-byte masters are not deployed anymore (check + with Tomas Ulin first!), and the accompanying test (rpl_row_4_bytes) + too. + */ + DBUG_EXECUTE_IF("old_row_based_repl_4_byte_map_id_master", + post_header_len[TABLE_MAP_EVENT-1]= + post_header_len[WRITE_ROWS_EVENT-1]= + post_header_len[UPDATE_ROWS_EVENT-1]= + post_header_len[DELETE_ROWS_EVENT-1]= 6;); post_header_len[BEGIN_LOAD_QUERY_EVENT-1]= post_header_len[APPEND_BLOCK_EVENT-1]; post_header_len[EXECUTE_LOAD_QUERY_EVENT-1]= EXECUTE_LOAD_QUERY_HEADER_LEN; } @@ -2202,10 +2279,8 @@ int Format_description_log_event::exec_event(struct st_relay_log_info* rli) As a transaction NEVER spans on 2 or more binlogs: if we have an active transaction at this point, the master died while writing the transaction to the binary log, i.e. while - flushing the binlog cache to the binlog. As the write was started, - the transaction had been committed on the master, so we lack of - information to replay this transaction on the slave; all we can do - is stop with error. + flushing the binlog cache to the binlog. XA guarantees that master has + rolled back. So we roll back. Note: this event could be sent by the master to inform us of the format of its binlog; in other words maybe it is not at its original place when it comes to us; we'll know this by checking @@ -2213,11 +2288,13 @@ int Format_description_log_event::exec_event(struct st_relay_log_info* rli) */ if (!artificial_event && created && thd->transaction.all.nht) { - slave_print_error(rli, 0, "Rolling back unfinished transaction (no " - "COMMIT or ROLLBACK) from relay log. A probable cause " - "is that the master died while writing the transaction " - "to its binary log."); - end_trans(thd, ROLLBACK); + /* This is not an error (XA is safe), just an information */ + slave_print_msg(INFORMATION_LEVEL, rli, 0, + "Rolling back unfinished transaction (no COMMIT " + "or ROLLBACK in relay log). A probable cause is that " + "the master died while writing the transaction to " + "its binary log, thus rolled back too."); + rli->cleanup_context(thd, 1); } #endif /* @@ -2759,11 +2836,14 @@ int Load_log_event::exec_event(NET* net, struct st_relay_log_info* rli, bool use_rli_only_for_errors) { thd->db_length= db_len; - thd->db= (char*) rewrite_db(db, &thd->db_length); + thd->db= (char *) rpl_filter->get_rewrite_db(db, &thd->db_length); DBUG_ASSERT(thd->query == 0); thd->query_length= 0; // Should not be needed thd->query_error= 0; clear_all_errors(thd, rli); + + /* see Query_log_event::exec_event() and BUG#13360 */ + DBUG_ASSERT(!rli->m_table_map.count()); /* Usually mysql_init_query() is called by mysql_parse(), but we need it here as the present method does not call mysql_parse(). @@ -2798,7 +2878,7 @@ int Load_log_event::exec_event(NET* net, struct st_relay_log_info* rli, ::exec_event(), then the companion SET also have so we don't need to reset_one_shot_variables(). */ - if (db_ok(thd->db, replicate_do_db, replicate_ignore_db)) + if (rpl_filter->db_ok(thd->db)) { thd->set_time((time_t)when); VOID(pthread_mutex_lock(&LOCK_thread_count)); @@ -2820,7 +2900,7 @@ int Load_log_event::exec_event(NET* net, struct st_relay_log_info* rli, tables.updating= 1; // the table will be opened in mysql_load - if (table_rules_on && !tables_ok(thd, &tables)) + if (rpl_filter->is_on() && !rpl_filter->tables_ok(thd->db, &tables)) { // TODO: this is a bug - this needs to be moved to the I/O thread if (net) @@ -2975,9 +3055,9 @@ error: sql_errno=ER_UNKNOWN_ERROR; err=ER(sql_errno); } - slave_print_error(rli,sql_errno,"\ + slave_print_msg(ERROR_LEVEL, rli, sql_errno,"\ Error '%s' running LOAD DATA INFILE on table '%s'. Default database: '%s'", - err, (char*)table_name, print_slave_db_safe(save_db)); + err, (char*)table_name, print_slave_db_safe(save_db)); free_root(thd->mem_root,MYF(MY_KEEP_PREALLOC)); return 1; } @@ -2985,9 +3065,9 @@ Error '%s' running LOAD DATA INFILE on table '%s'. Default database: '%s'", if (thd->is_fatal_error) { - slave_print_error(rli,ER_UNKNOWN_ERROR, "\ + slave_print_msg(ERROR_LEVEL, rli, ER_UNKNOWN_ERROR, "\ Fatal error running LOAD DATA INFILE on table '%s'. Default database: '%s'", - (char*)table_name, print_slave_db_safe(save_db)); + (char*)table_name, print_slave_db_safe(save_db)); return 1; } @@ -3048,8 +3128,7 @@ void Rotate_log_event::print(FILE* file, PRINT_EVENT_INFO* print_event_info) #ifndef MYSQL_CLIENT -Rotate_log_event::Rotate_log_event(THD* thd_arg, - const char* new_log_ident_arg, +Rotate_log_event::Rotate_log_event(const char* new_log_ident_arg, uint ident_len_arg, ulonglong pos_arg, uint flags_arg) :Log_event(), new_log_ident(new_log_ident_arg), @@ -3058,12 +3137,12 @@ Rotate_log_event::Rotate_log_event(THD* thd_arg, { #ifndef DBUG_OFF char buff[22]; - DBUG_ENTER("Rotate_log_event::Rotate_log_event(THD*,...)"); + DBUG_ENTER("Rotate_log_event::Rotate_log_event(...,flags)"); DBUG_PRINT("enter",("new_log_ident %s pos %s flags %lu", new_log_ident_arg, llstr(pos_arg, buff), flags)); #endif if (flags & DUP_NAME) - new_log_ident= my_strdup_with_length((const byte*) new_log_ident_arg, + new_log_ident= my_strndup((const byte*) new_log_ident_arg, ident_len, MYF(MY_WME)); DBUG_VOID_RETURN; } @@ -3087,7 +3166,7 @@ Rotate_log_event::Rotate_log_event(const char* buf, uint event_len, (header_size+post_header_len)); ident_offset = post_header_len; set_if_smaller(ident_len,FN_REFLEN-1); - new_log_ident= my_strdup_with_length((byte*) buf + ident_offset, + new_log_ident= my_strndup((byte*) buf + ident_offset, (uint) ident_len, MYF(MY_WME)); DBUG_VOID_RETURN; @@ -3370,12 +3449,24 @@ int Rand_log_event::exec_event(struct st_relay_log_info* rli) Xid_log_event methods **************************************************************************/ +#if !defined(DBUG_OFF) && !defined(MYSQL_CLIENT) +/* + This static class member could be removed when mysqltest is made to support + a --replace-regex command: then tests which have XIDs in their output can + use this command to suppress non-deterministic XID values. +*/ +my_bool Xid_log_event::show_xid; +#endif + #if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT) void Xid_log_event::pack_info(Protocol *protocol) { char buf[128], *pos; pos= strmov(buf, "COMMIT /* xid="); - pos= longlong10_to_str(xid, pos, 10); +#if !defined(DBUG_OFF) && !defined(MYSQL_CLIENT) + if (show_xid) +#endif + pos= longlong10_to_str(xid, pos, 10); pos= strmov(pos, " */"); protocol->store(buf, (uint) (pos-buf), &my_charset_bin); } @@ -3430,7 +3521,8 @@ void Xid_log_event::print(FILE* file, PRINT_EVENT_INFO* print_event_info) int Xid_log_event::exec_event(struct st_relay_log_info* rli) { /* For a slave Xid_log_event is COMMIT */ - mysql_log.write(thd,COM_QUERY,"COMMIT /* implicit, from Xid_log_event */"); + general_log_print(thd, COM_QUERY, + "COMMIT /* implicit, from Xid_log_event */"); return end_trans(thd, COMMIT) || Log_event::exec_event(rli); } #endif /* !MYSQL_CLIENT */ @@ -4195,9 +4287,8 @@ int Create_file_log_event::exec_event(struct st_relay_log_info* rli) init_io_cache(&file, fd, IO_SIZE, WRITE_CACHE, (my_off_t)0, 0, MYF(MY_WME|MY_NABP))) { - slave_print_error(rli,my_errno, - "Error in Create_file event: could not open file '%s'", - fname_buf); + slave_print_msg(ERROR_LEVEL, rli, my_errno, "Error in Create_file event: " + "could not open file '%s'", fname_buf); goto err; } @@ -4207,10 +4298,9 @@ int Create_file_log_event::exec_event(struct st_relay_log_info* rli) if (write_base(&file)) { strmov(ext, ".info"); // to have it right in the error message - slave_print_error(rli,my_errno, - "Error in Create_file event: could not write to file " - "'%s'", - fname_buf); + slave_print_msg(ERROR_LEVEL, rli, my_errno, + "Error in Create_file event: could not write to file '%s'", + fname_buf); goto err; } end_io_cache(&file); @@ -4222,16 +4312,14 @@ int Create_file_log_event::exec_event(struct st_relay_log_info* rli) O_WRONLY | O_BINARY | O_EXCL | O_NOFOLLOW, MYF(MY_WME))) < 0) { - slave_print_error(rli,my_errno, - "Error in Create_file event: could not open file '%s'", - fname_buf); + slave_print_msg(ERROR_LEVEL, rli, my_errno, "Error in Create_file event: " + "could not open file '%s'", fname_buf); goto err; } if (my_write(fd, (byte*) block, block_len, MYF(MY_WME+MY_NABP))) { - slave_print_error(rli,my_errno, - "Error in Create_file event: write to '%s' failed", - fname_buf); + slave_print_msg(ERROR_LEVEL, rli, my_errno, "Error in Create_file event: " + "write to '%s' failed", fname_buf); goto err; } error=0; // Everything is ok @@ -4369,25 +4457,25 @@ int Append_block_log_event::exec_event(struct st_relay_log_info* rli) O_WRONLY | O_BINARY | O_EXCL | O_NOFOLLOW, MYF(MY_WME))) < 0) { - slave_print_error(rli, my_errno, - "Error in %s event: could not create file '%s'", - get_type_str(), fname); + slave_print_msg(ERROR_LEVEL, rli, my_errno, + "Error in %s event: could not create file '%s'", + get_type_str(), fname); goto err; } } else if ((fd = my_open(fname, O_WRONLY | O_APPEND | O_BINARY | O_NOFOLLOW, MYF(MY_WME))) < 0) { - slave_print_error(rli, my_errno, - "Error in %s event: could not open file '%s'", - get_type_str(), fname); + slave_print_msg(ERROR_LEVEL, rli, my_errno, + "Error in %s event: could not open file '%s'", + get_type_str(), fname); goto err; } if (my_write(fd, (byte*) block, block_len, MYF(MY_WME+MY_NABP))) { - slave_print_error(rli, my_errno, - "Error in %s event: write to '%s' failed", - get_type_str(), fname); + slave_print_msg(ERROR_LEVEL, rli, my_errno, + "Error in %s event: write to '%s' failed", + get_type_str(), fname); goto err; } error=0; @@ -4593,9 +4681,8 @@ int Execute_load_log_event::exec_event(struct st_relay_log_info* rli) init_io_cache(&file, fd, IO_SIZE, READ_CACHE, (my_off_t)0, 0, MYF(MY_WME|MY_NABP))) { - slave_print_error(rli,my_errno, - "Error in Exec_load event: could not open file '%s'", - fname); + slave_print_msg(ERROR_LEVEL, rli, my_errno, "Error in Exec_load event: " + "could not open file '%s'", fname); goto err; } if (!(lev = (Load_log_event*)Log_event::read_log_event(&file, @@ -4603,9 +4690,8 @@ int Execute_load_log_event::exec_event(struct st_relay_log_info* rli) rli->relay_log.description_event_for_exec)) || lev->get_type_code() != NEW_LOAD_EVENT) { - slave_print_error(rli,0, - "Error in Exec_load event: file '%s' appears corrupted", - fname); + slave_print_msg(ERROR_LEVEL, rli, 0, "Error in Exec_load event: " + "file '%s' appears corrupted", fname); goto err; } @@ -4631,10 +4717,10 @@ int Execute_load_log_event::exec_event(struct st_relay_log_info* rli) char *tmp= my_strdup(rli->last_slave_error,MYF(MY_WME)); if (tmp) { - slave_print_error(rli, - rli->last_slave_errno, /* ok to re-use error code */ - "%s. Failed executing load from '%s'", - tmp, fname); + slave_print_msg(ERROR_LEVEL, rli, + rli->last_slave_errno, /* ok to re-use error code */ + "%s. Failed executing load from '%s'", + tmp, fname); my_free(tmp,MYF(0)); } goto err; @@ -4840,7 +4926,7 @@ Execute_load_query_log_event::exec_event(struct st_relay_log_info* rli) if (!(buf = my_malloc(q_len + 1 - (fn_pos_end - fn_pos_start) + (FN_REFLEN + 10) + 10 + 8 + 5, MYF(MY_WME)))) { - slave_print_error(rli, my_errno, "Not enough memory"); + slave_print_msg(ERROR_LEVEL, rli, my_errno, "Not enough memory"); return 1; } @@ -4965,3 +5051,1762 @@ char* sql_ex_info::init(char* buf,char* buf_end,bool use_new_format) } return buf; } + + +#ifdef HAVE_ROW_BASED_REPLICATION + +/************************************************************************** + Rows_log_event member functions +**************************************************************************/ + +#ifndef MYSQL_CLIENT +Rows_log_event::Rows_log_event(THD *thd_arg, TABLE *tbl_arg, ulong tid, + MY_BITMAP const *cols, bool is_transactional) + : Log_event(thd_arg, 0, is_transactional), + m_table(tbl_arg), + m_table_id(tid), + m_width(tbl_arg->s->fields), + m_rows_buf((byte*)my_malloc(opt_binlog_rows_event_max_size * + sizeof(*m_rows_buf), MYF(MY_WME))), + m_rows_cur(m_rows_buf), + m_rows_end(m_rows_buf + opt_binlog_rows_event_max_size), + m_flags(0) +{ + DBUG_ASSERT(m_table && m_table->s); + DBUG_ASSERT(m_table_id != ULONG_MAX); + + if (thd_arg->options & OPTION_NO_FOREIGN_KEY_CHECKS) + set_flags(NO_FOREIGN_KEY_CHECKS_F); + if (thd_arg->options & OPTION_RELAXED_UNIQUE_CHECKS) + set_flags(RELAXED_UNIQUE_CHECKS_F); + /* if bitmap_init fails, catched in is_valid() */ + if (likely(!bitmap_init(&m_cols, + m_width <= sizeof(m_bitbuf)*8 ? m_bitbuf : NULL, + (m_width + 7) & ~7UL, + false))) + memcpy(m_cols.bitmap, cols->bitmap, no_bytes_in_map(cols)); + else + m_cols.bitmap= 0; // to not free it +} +#endif + +Rows_log_event::Rows_log_event(const char *buf, uint event_len, + Log_event_type event_type, + const Format_description_log_event + *description_event) + : Log_event(buf, description_event), + m_rows_buf(0), m_rows_cur(0), m_rows_end(0) +{ + DBUG_ENTER("Rows_log_event::Rows_log_event(const char*,...)"); + uint8 const common_header_len= description_event->common_header_len; + uint8 const post_header_len= description_event->post_header_len[event_type-1]; + + DBUG_PRINT("enter",("event_len=%ld, common_header_len=%d, " + "post_header_len=%d", + event_len, common_header_len, + post_header_len)); + + const char *post_start= buf + common_header_len; + post_start+= RW_MAPID_OFFSET; + if (post_header_len == 6) + { + /* Master is of an intermediate source tree before 5.1.4. Id is 4 bytes */ + m_table_id= uint4korr(post_start); + post_start+= 4; + } + else + { + m_table_id= uint6korr(post_start); + post_start+= RW_FLAGS_OFFSET; + } + + DBUG_ASSERT(m_table_id != ULONG_MAX); + + m_flags= uint2korr(post_start); + + byte const *const var_start= (const byte *)buf + common_header_len + + post_header_len; + byte const *const ptr_width= var_start; + uchar *ptr_after_width= (uchar*) ptr_width; + m_width = net_field_length(&ptr_after_width); + + const uint byte_count= (m_width + 7) / 8; + const byte* const ptr_rows_data= var_start + byte_count + 1; + + my_size_t const data_size= event_len - (ptr_rows_data - (const byte *) buf); + DBUG_PRINT("info",("m_table_id=%lu, m_flags=%d, m_width=%u, data_size=%lu", + m_table_id, m_flags, m_width, data_size)); + + m_rows_buf= (byte*)my_malloc(data_size, MYF(MY_WME)); + if (likely((bool)m_rows_buf)) + { + /* if bitmap_init fails, catched in is_valid() */ + if (likely(!bitmap_init(&m_cols, + m_width <= sizeof(m_bitbuf)*8 ? m_bitbuf : NULL, + (m_width + 7) & ~7UL, + false))) + memcpy(m_cols.bitmap, ptr_after_width, byte_count); + m_rows_end= m_rows_buf + data_size; + m_rows_cur= m_rows_end; + memcpy(m_rows_buf, ptr_rows_data, data_size); + } + else + m_cols.bitmap= 0; // to not free it + + DBUG_VOID_RETURN; +} + +Rows_log_event::~Rows_log_event() +{ + if (m_cols.bitmap == m_bitbuf) // no my_malloc happened + m_cols.bitmap= 0; // so no my_free in bitmap_free + bitmap_free(&m_cols); // To pair with bitmap_init(). + my_free((gptr)m_rows_buf, MYF(MY_ALLOW_ZERO_PTR)); +} + +#ifndef MYSQL_CLIENT +int Rows_log_event::do_add_row_data(byte *const row_data, + my_size_t const length) +{ + /* + When the table has a primary key, we would probably want, by default, to + log only the primary key value instead of the entire "before image". This + would save binlog space. TODO + */ + DBUG_ENTER("Rows_log_event::do_add_row_data(byte *data, my_size_t length)"); + DBUG_PRINT("enter", ("row_data= %p, length= %lu", row_data, length)); + DBUG_DUMP("row_data", (const char*)row_data, min(length, 32)); + + DBUG_ASSERT(m_rows_buf <= m_rows_cur); + DBUG_ASSERT(m_rows_buf < m_rows_end); + DBUG_ASSERT(m_rows_cur <= m_rows_end); + + /* The cast will always work since m_rows_cur <= m_rows_end */ + if (static_cast<my_size_t>(m_rows_end - m_rows_cur) < length) + { + my_size_t const block_size= 1024; + my_ptrdiff_t const old_alloc= m_rows_end - m_rows_buf; + my_ptrdiff_t const new_alloc= + old_alloc + block_size * (length / block_size + block_size - 1); + my_ptrdiff_t const cur_size= m_rows_cur - m_rows_buf; + + byte* const new_buf= + (byte*)my_realloc((gptr)m_rows_buf, new_alloc, MYF(MY_WME)); + if (unlikely(!new_buf)) + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + + /* If the memory moved, we need to move the pointers */ + if (new_buf != m_rows_buf) + { + m_rows_buf= new_buf; + m_rows_cur= m_rows_buf + cur_size; + } + + /* + The end pointer should always be changed to point to the end of + the allocated memory. + */ + m_rows_end= m_rows_buf + new_alloc; + } + + DBUG_ASSERT(m_rows_cur + length < m_rows_end); + memcpy(m_rows_cur, row_data, length); + m_rows_cur+= length; + DBUG_RETURN(0); +} +#endif + +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) +/* + Unpack a row into a record. The row is assumed to only consist of the fields + for which the bitset represented by 'arr' and 'bits'; the other parts of the + record are left alone. + */ +static char const *unpack_row(TABLE *table, + byte *record, char const *row, + MY_BITMAP const *cols) +{ + DBUG_ASSERT(record && row); + + MY_BITMAP *write_set= table->file->write_set; + my_size_t const n_null_bytes= table->s->null_bytes; + my_ptrdiff_t const offset= record - (byte*) table->record[0]; + + memcpy(record, row, n_null_bytes); + char const *ptr= row + n_null_bytes; + + bitmap_set_all(write_set); + Field **const begin_ptr = table->field; + for (Field **field_ptr= begin_ptr ; *field_ptr ; ++field_ptr) + { + Field *const f= *field_ptr; + + if (bitmap_is_set(cols, field_ptr - begin_ptr)) + { + /* Field...::unpack() cannot return 0 */ + ptr= f->unpack(f->ptr + offset, ptr); + } + else + bitmap_clear_bit(write_set, (field_ptr - begin_ptr) + 1); + } + return ptr; +} + +int Rows_log_event::exec_event(st_relay_log_info *rli) +{ + DBUG_ENTER("Rows_log_event::exec_event(st_relay_log_info*)"); + DBUG_ASSERT(m_table_id != ULONG_MAX); + int error= 0; + char const *row_start= (char const *)m_rows_buf; + TABLE* table= rli->m_table_map.get_table(m_table_id); + + /* + 'thd' has been set by exec_relay_log_event(), just before calling + exec_event(). We still check here to prevent future coding errors. + */ + DBUG_ASSERT(rli->sql_thd == thd); + + /* + lock_tables() reads the contents of thd->lex, so they must be + initialized, so we should call lex_start(); to be even safer, we call + mysql_init_query() which does a more complete set of inits. + */ + mysql_init_query(thd, NULL, 0); + + if (table) + { + /* + table == NULL means that this table should not be + replicated (this was set up by Table_map_log_event::exec_event() which + tested replicate-* rules). + */ + TABLE_LIST table_list; + TABLE_LIST *tables= &table_list; + bool need_reopen; + uint count= 1; + bzero(&table_list, sizeof(table_list)); + table_list.lock_type= TL_WRITE; + table_list.next_global= table_list.next_local= 0; + table_list.table= table; + + for ( ; ; ) + { + table_list.db= const_cast<char*>(table->s->db.str); + table_list.alias= table_list.table_name= + const_cast<char*>(table->s->table_name.str); + + if ((error= lock_tables(thd, &table_list, count, &need_reopen)) == 0) + break; + if (!need_reopen) + { + slave_print_msg(ERROR_LEVEL, rli, error, + "Error in %s event: error during table %s.%s lock", + get_type_str(), table->s->db.str, + table->s->table_name.str); + DBUG_RETURN(error); + } + /* + we need to store a local copy of the table names since the table object + will become invalid after close_tables_for_reopen + */ + char *db= my_strdup(table->s->db.str, MYF(MY_WME)); + char *table_name= my_strdup(table->s->table_name.str, MYF(MY_WME)); + + if (db == 0 || table_name == 0) + { + /* + Since the lock_tables() failed, the table is not locked, so + we don't need to unlock them. + */ + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + + /* + We also needs to flush the pending RBR event, since it keeps a + pointer to an open table. + + ALTERNATIVE SOLUTION: Extract a pointer to the pending RBR + event and reset the table pointer after the tables has been + reopened. + */ + thd->binlog_flush_pending_rows_event(false); + + close_tables_for_reopen(thd, &tables); + + /* open the table again, same as in Table_map_event::exec_event */ + table_list.db= const_cast<char*>(db); + table_list.alias= table_list.table_name= const_cast<char*>(table_name); + table_list.updating= 1; + if ((error= open_tables(thd, &tables, &count, 0)) == 0) + { + /* reset some variables for the table list*/ + table_list.updating= 0; + /* retrieve the new table reference and update the table map */ + table= table_list.table; + error= rli->m_table_map.set_table(m_table_id, table); + } + else /* error in open_tables */ + { + if (thd->query_error || thd->is_fatal_error) + { + /* + Error reporting borrowed from Query_log_event with many excessive + simplifications (we don't honour --slave-skip-errors) + */ + uint actual_error= thd->net.last_errno; + slave_print_msg(ERROR_LEVEL, rli, actual_error, + "Error '%s' on reopening table `%s`.`%s`", + (actual_error ? thd->net.last_error : + "unexpected success or fatal error"), + db, table_name); + thd->query_error= 1; + } + } + my_free((char*) db, MYF(MY_ALLOW_ZERO_PTR)); + my_free((char*) table_name, MYF(MY_ALLOW_ZERO_PTR)); + + if (error) + DBUG_RETURN(error); + } + + /* + It's not needed to set_time() but + 1) it continues the property that "Time" in SHOW PROCESSLIST shows how + much slave is behind + 2) it will be needed when we allow replication from a table with no + TIMESTAMP column to a table with one. + So we call set_time(), like in SBR. Presently it changes nothing. + */ + thd->set_time((time_t)when); + /* + There are a few flags that are replicated with each row event. + Make sure to set/clear them before executing the main body of + the event. + */ + if (get_flags(NO_FOREIGN_KEY_CHECKS_F)) + thd->options|= OPTION_NO_FOREIGN_KEY_CHECKS; + else + thd->options&= ~OPTION_NO_FOREIGN_KEY_CHECKS; + + if (get_flags(RELAXED_UNIQUE_CHECKS_F)) + thd->options|= OPTION_RELAXED_UNIQUE_CHECKS; + else + thd->options&= ~OPTION_RELAXED_UNIQUE_CHECKS; + /* A small test to verify that objects have consistent types */ + DBUG_ASSERT(sizeof(thd->options) == sizeof(OPTION_RELAXED_UNIQUE_CHECKS)); + + error= do_before_row_operations(table); + while (error == 0 && row_start < (const char*)m_rows_end) { + char const *row_end= do_prepare_row(thd, table, row_start); + DBUG_ASSERT(row_end != NULL); // cannot happen + DBUG_ASSERT(row_end <= (const char*)m_rows_end); + + /* in_use can have been set to NULL in close_tables_for_reopen */ + THD* old_thd= table->in_use; + if (!table->in_use) + table->in_use= thd; + error= do_exec_row(table); + table->in_use = old_thd; + switch (error) + { + /* Some recoverable errors */ + case HA_ERR_RECORD_CHANGED: + case HA_ERR_KEY_NOT_FOUND: /* Idempotency support: OK if + tuple does not exist */ + error= 0; + case 0: + break; + + default: + slave_print_msg(ERROR_LEVEL, rli, error, + "Error in %s event: row application failed", + get_type_str()); + thd->query_error= 1; + break; + } + + row_start= row_end; + } + DBUG_EXECUTE_IF("STOP_SLAVE_after_first_Rows_event", + rli->abort_slave=1;); + error= do_after_row_operations(table, error); + if (!cache_stmt) + thd->options|= OPTION_STATUS_NO_TRANS_UPDATE; + + } + + if (error) + { /* error has occured during the transaction */ + slave_print_msg(ERROR_LEVEL, rli, error, + "Error in %s event: error during transaction execution " + "on table %s.%s", + get_type_str(), table->s->db.str, + table->s->table_name.str); + /* + If one day we honour --skip-slave-errors in row-based replication, and + the error should be skipped, then we would clear mappings, rollback, + close tables, but the slave SQL thread would not stop and then may + assume the mapping is still available, the tables are still open... + So then we should clear mappings/rollback/close here only if this is a + STMT_END_F. + For now we code, knowing that error is not skippable and so slave SQL + thread is certainly going to stop. + */ + rli->cleanup_context(thd, 1); + thd->query_error= 1; + DBUG_RETURN(error); + } + + if (get_flags(STMT_END_F)) + { + /* + This is the end of a statement or transaction, so close (and + unlock) the tables we opened when processing the + Table_map_log_event starting the statement. + + OBSERVER. This will clear *all* mappings, not only those that + are open for the table. There is not good handle for on-close + actions for tables. + + NOTE. Even if we have no table ('table' == 0) we still need to be + here, so that we increase the group relay log position. If we didn't, we + could have a group relay log position which lags behind "forever" + (assume the last master's transaction is ignored by the slave because of + replicate-ignore rules). + */ + thd->binlog_flush_pending_rows_event(true); + /* + If this event is not in a transaction, the call below will, if some + transactional storage engines are involved, commit the statement into + them and flush the pending event to binlog. + If this event is in a transaction, the call will do nothing, but a + Xid_log_event will come next which will, if some transactional engines + are involved, commit the transaction and flush the pending event to the + binlog. + */ + error= ha_autocommit_or_rollback(thd, 0); + /* + Now what if this is not a transactional engine? we still need to + flush the pending event to the binlog; we did it with + thd->binlog_flush_pending_rows_event(). Note that we imitate + what is done for real queries: a call to + ha_autocommit_or_rollback() (sometimes only if involves a + transactional engine), and a call to be sure to have the pending + event flushed. + */ + + rli->cleanup_context(thd, 0); + rli->transaction_end(thd); + + if (error == 0) + { + /* + Clear any errors pushed in thd->net.last_err* if for example "no key + found" (as this is allowed). This is a safety measure; apparently + those errors (e.g. when executing a Delete_rows_log_event of a + non-existing row, like in rpl_row_mystery22.test, + thd->net.last_error = "Can't find record in 't1'" and last_errno=1032) + do not become visible. We still prefer to wipe them out. + */ + thd->clear_error(); + error= Log_event::exec_event(rli); + } + else + slave_print_msg(ERROR_LEVEL, rli, error, + "Error in %s event: commit of row events failed, " + "table `%s`.`%s`", + get_type_str(), table->s->db.str, + table->s->table_name.str); + DBUG_RETURN(error); + } + + if (table) + { + /* + As "table" is not NULL, we did a successful lock_tables(), without any + prior LOCK TABLES and are not in prelocked mode, so this assertion should + be true. + */ + DBUG_ASSERT(thd->lock); + /* + If we are here, there are more events to come which may use our mappings + and our table. So don't clear mappings or close tables, just unlock + tables. + Why don't we lock the table once for all in + Table_map_log_event::exec_event() ? Because we could have in binlog: + BEGIN; + Table_map t1 -> 1 + Write_rows to id 1 + Table_map t2 -> 2 + Write_rows to id 2 + Xid_log_event + So we cannot lock t1 when executing the first Table_map, because at that + moment we don't know we'll also have to lock t2, and all tables must be + locked at once in MySQL. + */ + mysql_unlock_tables(thd, thd->lock); + thd->lock= 0; + if ((table->s->primary_key == MAX_KEY) && + !cache_stmt) + { + /* + ------------ Temporary fix until WL#2975 is implemented --------- + This event is not the last one (no STMT_END_F). If we stop now (in + case of terminate_slave_thread()), how will we restart? We have to + restart from Table_map_log_event, but as this table is not + transactional, the rows already inserted will still be present, and + idempotency is not guaranteed (no PK) so we risk that repeating leads + to double insert. So we desperately try to continue, hope we'll + eventually leave this buggy situation (by executing the final + Rows_log_event). If we are in a hopeless wait (reached end of last + relay log and nothing gets appended there), we timeout after one + minute, and notify DBA about the problem. + When WL#2975 is implemented, just remove the member + st_relay_log_info::unsafe_to_stop_at and all its occurences. + */ + rli->unsafe_to_stop_at= time(0); + } + } + + DBUG_ASSERT(error == 0); + thd->clear_error(); + rli->inc_event_relay_log_pos(); + + DBUG_RETURN(0); +} +#endif /* !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) */ + +#ifndef MYSQL_CLIENT +bool Rows_log_event::write_data_header(IO_CACHE *file) +{ + DBUG_ASSERT(m_table_id != ULONG_MAX); + byte buf[ROWS_HEADER_LEN]; // No need to init the buffer + DBUG_EXECUTE_IF("old_row_based_repl_4_byte_map_id_master", + { + int4store(buf + 0, m_table_id); + int2store(buf + 4, m_flags); + return (my_b_safe_write(file, buf, 6)); + }); + int6store(buf + RW_MAPID_OFFSET, (ulonglong)m_table_id); + int2store(buf + RW_FLAGS_OFFSET, m_flags); + return (my_b_safe_write(file, buf, ROWS_HEADER_LEN)); +} + +bool Rows_log_event::write_data_body(IO_CACHE*file) +{ + /* + Note that this should be the number of *bits*, not the number of + bytes. + */ + char sbuf[sizeof(m_width)]; + my_ptrdiff_t const data_size= m_rows_cur - m_rows_buf; + + char *const sbuf_end= net_store_length((char*) sbuf, (uint) m_width); + DBUG_ASSERT(static_cast<my_size_t>(sbuf_end - sbuf) <= sizeof(sbuf)); + + return (my_b_safe_write(file, reinterpret_cast<byte*>(sbuf), + sbuf_end - sbuf) || + my_b_safe_write(file, reinterpret_cast<byte*>(m_cols.bitmap), + no_bytes_in_map(&m_cols)) || + my_b_safe_write(file, m_rows_buf, data_size)); +} +#endif + +#if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT) && defined(DBUG_RBR) +void Rows_log_event::pack_info(Protocol *protocol) +{ + char buf[256]; + char const *const flagstr= get_flags(STMT_END_F) ? "STMT_END_F" : ""; + char const *const dbnam= m_table->s->db.str; + char const *const tblnam= m_table->s->table_name.str; + my_size_t bytes= snprintf(buf, sizeof(buf), + "%s.%s - %s", dbnam, tblnam, flagstr); + protocol->store(buf, bytes, &my_charset_bin); +} +#endif + +/************************************************************************** + Table_map_log_event member functions +**************************************************************************/ + +/* + Constructor used to build an event for writing to the binary log. + Mats says tbl->s lives longer than this event so it's ok to copy pointers + (tbl->s->db etc) and not pointer content. + */ +#if !defined(MYSQL_CLIENT) +Table_map_log_event::Table_map_log_event(THD *thd, TABLE *tbl, ulong tid, + bool is_transactional, uint16 flags) + : Log_event(thd, 0, is_transactional), + m_table(tbl), + m_dbnam(tbl->s->db.str), + m_dblen(m_dbnam ? tbl->s->db.length : 0), + m_tblnam(tbl->s->table_name.str), + m_tbllen(tbl->s->table_name.length), + m_colcnt(tbl->s->fields), m_coltype(0), + m_table_id(tid), + m_flags(flags) +{ + DBUG_ASSERT(m_table_id != ULONG_MAX); + /* + In TABLE_SHARE, "db" and "table_name" are 0-terminated (see this comment in + table.cc / alloc_table_share(): + Use the fact the key is db/0/table_name/0 + As we rely on this let's assert it. + */ + DBUG_ASSERT((tbl->s->db.str == 0) || + (tbl->s->db.str[tbl->s->db.length] == 0)); + DBUG_ASSERT(tbl->s->table_name.str[tbl->s->table_name.length] == 0); + + + m_data_size= TABLE_MAP_HEADER_LEN; + DBUG_EXECUTE_IF("old_row_based_repl_4_byte_map_id_master", m_data_size= 6;); + m_data_size+= m_dblen + 2; // Include length and terminating \0 + m_data_size+= m_tbllen + 2; // Include length and terminating \0 + m_data_size+= 1 + m_colcnt; // COLCNT and column types + + /* If malloc fails, catched in is_valid() */ + if ((m_memory= my_malloc(m_colcnt, MYF(MY_WME)))) + { + m_coltype= reinterpret_cast<unsigned char*>(m_memory); + for (unsigned int i= 0 ; i < m_table->s->fields ; ++i) + m_coltype[i]= m_table->field[i]->type(); + } +} +#endif /* !defined(MYSQL_CLIENT) */ + +/* + Constructor used by slave to read the event from the binary log. + */ +#if defined(HAVE_REPLICATION) +Table_map_log_event::Table_map_log_event(const char *buf, uint event_len, + const Format_description_log_event + *description_event) + + : Log_event(buf, description_event), +#ifndef MYSQL_CLIENT + m_table(NULL), +#endif + m_memory(NULL) +{ + DBUG_ENTER("Table_map_log_event::Table_map_log_event(const char*,uint,...)"); + + uint8 common_header_len= description_event->common_header_len; + uint8 post_header_len= description_event->post_header_len[TABLE_MAP_EVENT-1]; + DBUG_PRINT("info",("event_len=%ld, common_header_len=%d, post_header_len=%d", + event_len, common_header_len, post_header_len)); + + DBUG_DUMP("event buffer", buf, event_len); + + /* Read the post-header */ + const char *post_start= buf + common_header_len; + + post_start+= TM_MAPID_OFFSET; + if (post_header_len == 6) + { + /* Master is of an intermediate source tree before 5.1.4. Id is 4 bytes */ + m_table_id= uint4korr(post_start); + post_start+= 4; + } + else + { + DBUG_ASSERT(post_header_len == TABLE_MAP_HEADER_LEN); + m_table_id= uint6korr(post_start); + post_start+= TM_FLAGS_OFFSET; + } + + DBUG_ASSERT(m_table_id != ULONG_MAX); + + m_flags= uint2korr(post_start); + + /* Read the variable part of the event */ + const char *const vpart= buf + common_header_len + post_header_len; + + /* Extract the length of the various parts from the buffer */ + byte const* const ptr_dblen= (byte const*)vpart + 0; + m_dblen= *(unsigned char*) ptr_dblen; + + /* Length of database name + counter + terminating null */ + byte const* const ptr_tbllen= ptr_dblen + m_dblen + 2; + m_tbllen= *(unsigned char*) ptr_tbllen; + + /* Length of table name + counter + terminating null */ + byte const* const ptr_colcnt= ptr_tbllen + m_tbllen + 2; + uchar *ptr_after_colcnt= (uchar*) ptr_colcnt; + m_colcnt= net_field_length(&ptr_after_colcnt); + + DBUG_PRINT("info",("m_dblen=%d off=%d m_tbllen=%d off=%d m_colcnt=%d off=%d", + m_dblen, ptr_dblen-(const byte*)vpart, + m_tbllen, ptr_tbllen-(const byte*)vpart, + m_colcnt, ptr_colcnt-(const byte*)vpart)); + + /* Allocate mem for all fields in one go. If fails, catched in is_valid() */ + m_memory= my_multi_malloc(MYF(MY_WME), + &m_dbnam, m_dblen + 1, + &m_tblnam, m_tbllen + 1, + &m_coltype, m_colcnt, + NULL); + + if (m_memory) + { + /* Copy the different parts into their memory */ + strncpy(const_cast<char*>(m_dbnam), (const char*)ptr_dblen + 1, m_dblen + 1); + strncpy(const_cast<char*>(m_tblnam), (const char*)ptr_tbllen + 1, m_tbllen + 1); + memcpy(m_coltype, ptr_after_colcnt, m_colcnt); + } + + DBUG_VOID_RETURN; +} +#endif + +Table_map_log_event::~Table_map_log_event() +{ + my_free(m_memory, MYF(MY_ALLOW_ZERO_PTR)); +} + +/* + Find a table based on database name and table name. + + DESCRIPTION + + Currently, only the first table of the 'table_list' is located. If the + table is found in the list of open tables for the thread, the 'table' + field of 'table_list' is filled in. + + PARAMETERS + + thd Thread structure + table_list List of tables to locate in the thd->open_tables list. + count Pointer to a variable that will be set to the number of + tables found. If the pointer is NULL, nothing will be stored. + + RETURN VALUE + + The number of tables found. + + TO DO + + Replace the list of table searches with a hash based on the combined + database and table name. The handler_tables_hash is inappropriate since + it hashes on the table alias. At the same time, the function can be + extended to handle a full list of table names, in the same spirit as + open_tables() and lock_tables(). +*/ +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) +static uint find_tables(THD *thd, TABLE_LIST *table_list, uint *count) +{ + uint result= 0; + + /* we verify that the caller knows our limitation */ + DBUG_ASSERT(table_list->next_global == 0); + for (TABLE *table= thd->open_tables; table ; table= table->next) + { + if (strcmp(table->s->db.str, table_list->db) == 0 + && strcmp(table->s->table_name.str, table_list->table_name) == 0) + { + /* Copy the table pointer into the table list. */ + table_list->table= table; + result= 1; + break; + } + } + + if (count) + *count= result; + return result; +} +#endif + +/* + Return value is an error code, one of: + + -1 Failure to open table [from open_tables()] + 0 Success + 1 No room for more tables [from set_table()] + 2 Out of memory [from set_table()] + 3 Wrong table definition + 4 Daisy-chaining RBR with SBR not possible + */ + +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) +int Table_map_log_event::exec_event(st_relay_log_info *rli) +{ + DBUG_ENTER("Table_map_log_event::exec_event(st_relay_log_info*)"); + + DBUG_ASSERT(rli->sql_thd == thd); + + /* Step the query id to mark what columns that are actually used. */ + pthread_mutex_lock(&LOCK_thread_count); + thd->query_id= next_query_id(); + pthread_mutex_unlock(&LOCK_thread_count); + + TABLE_LIST table_list; + uint32 dummy_len; + bzero(&table_list, sizeof(table_list)); + table_list.db= const_cast<char *> + (rpl_filter->get_rewrite_db(m_dbnam, &dummy_len)); + table_list.alias= table_list.table_name= const_cast<char*>(m_tblnam); + table_list.lock_type= TL_WRITE; + table_list.next_global= table_list.next_local= 0; + table_list.updating= 1; + + int error= 0; + + if (rpl_filter->db_ok(table_list.db) && + (!rpl_filter->is_on() || rpl_filter->tables_ok("", &table_list))) + { + /* + TODO: Mats will soon change this test below so that a SBR slave always + accepts RBR events from the master (and binlogs them RBR). + */ + /* + Check if the slave is set to use SBR. If so, the slave should + stop immediately since it is not possible to daisy-chain from + RBR to SBR. Once RBR is used, the rest of the chain has to use + RBR. + */ + if (mysql_bin_log.is_open() && (thd->options & OPTION_BIN_LOG) && + !thd->current_stmt_binlog_row_based) + { + slave_print_msg(ERROR_LEVEL, rli, ER_BINLOG_ROW_RBR_TO_SBR, + "It is not possible to use statement-based binlogging " + "on a slave that replicates row-based. Please use " + "--binrow-format=row on slave if you want to use " + "--log-slave-updates and read row-based binlog events."); + DBUG_RETURN(ERR_RBR_TO_SBR); + } + + /* + Open the table if it is not already open and add the table to table map. + If the table should not be replicated, we don't bother to do anything. + The table map will return NULL and the row-level event will effectively + be a no-op. + */ + uint count; + if (find_tables(thd, &table_list, &count) == 0) + { + /* + open_tables() reads the contents of thd->lex, so they must be + initialized, so we should call lex_start(); to be even safer, we call + mysql_init_query() which does a more complete set of inits. + */ + mysql_init_query(thd, NULL, 0); + TABLE_LIST *tables= &table_list; + if ((error= open_tables(thd, &tables, &count, 0))) + { + if (thd->query_error || thd->is_fatal_error) + { + /* + Error reporting borrowed from Query_log_event with many excessive + simplifications (we don't honour --slave-skip-errors) + */ + uint actual_error= thd->net.last_errno; + slave_print_msg(ERROR_LEVEL, rli, actual_error, + "Error '%s' on opening table `%s`.`%s`", + (actual_error ? thd->net.last_error : + "unexpected success or fatal error"), + table_list.db, table_list.table_name); + thd->query_error= 1; + } + DBUG_RETURN(error); + } + } + + m_table= table_list.table; + + /* + This will fail later otherwise, the 'in_use' field should be + set to the current thread. + */ + DBUG_ASSERT(m_table->in_use); + + /* + Check that the number of columns and the field types in the + event match the number of columns and field types in the opened + table. + */ + uint col= m_table->s->fields; + + if (col == m_colcnt) + { + while (col-- > 0) + if (m_table->field[col]->type() != m_coltype[col]) + break; + } + + TABLE_SHARE const *const tsh= m_table->s; + + /* + Check the following termination conditions: + + (col == m_table->s->fields) + ==> (m_table->s->fields != m_colcnt) + (0 <= col < m_table->s->fields) + ==> (m_table->field[col]->type() != m_coltype[col]) + + Logically, A ==> B is equivalent to !A || B + + Since col is unsigned, is suffices to check that col <= + tsh->fields. If col wrapped (by decreasing col when it is 0), + the number will be UINT_MAX, which is greater than tsh->fields. + */ + DBUG_ASSERT(!(col == tsh->fields) || tsh->fields != m_colcnt); + DBUG_ASSERT(!(col < tsh->fields) || + (m_table->field[col]->type() != m_coltype[col])); + + if (col <= tsh->fields) + { + /* + If we get here, the number of columns in the event didn't + match the number of columns in the table on the slave, *or* + there were a column in the table on the slave that did not + have the same type as given in the event. + + If 'col' has the value that was assigned to it, it was a + mismatch between the number of columns on the master and the + slave. + */ + if (col == tsh->fields) + { + DBUG_ASSERT(tsh->db.str && tsh->table_name.str); + slave_print_msg(ERROR_LEVEL, rli, ER_BINLOG_ROW_WRONG_TABLE_DEF, + "Table width mismatch - " + "received %u columns, %s.%s has %u columns", + m_colcnt, tsh->db.str, tsh->table_name.str, tsh->fields); + } + else + { + DBUG_ASSERT(col < m_colcnt && col < tsh->fields); + DBUG_ASSERT(tsh->db.str && tsh->table_name.str); + slave_print_msg(ERROR_LEVEL, rli, ER_BINLOG_ROW_WRONG_TABLE_DEF, + "Column %d type mismatch - " + "received type %d, %s.%s has type %d", + col, m_coltype[col], tsh->db.str, tsh->table_name.str, + m_table->field[col]->type()); + } + + thd->query_error= 1; + DBUG_RETURN(ERR_BAD_TABLE_DEF); + } + + /* + We record in the slave's information that the number m_table_id is + mapped to the m_table object + */ + if (!error) + error= rli->m_table_map.set_table(m_table_id, m_table); + + /* + Tell the RLI that we are touching a table. + + TODO: Maybe we can combine this with the previous operation? + */ + if (!error) + rli->touching_table(m_dbnam, m_tblnam, m_table_id); + } + + /* + We explicitly do not call Log_event::exec_event() here since we do not + want the relay log position to be flushed to disk. The flushing will be + done by the last Rows_log_event that either ends a statement (outside a + transaction) or a transaction. + + A table map event can *never* end a transaction or a statement, so we + just step the relay log position. + */ + + if (likely(!error)) + rli->inc_event_relay_log_pos(); + + DBUG_RETURN(error); +} +#endif /* !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) */ + +#ifndef MYSQL_CLIENT +bool Table_map_log_event::write_data_header(IO_CACHE *file) +{ + DBUG_ASSERT(m_table_id != ULONG_MAX); + byte buf[TABLE_MAP_HEADER_LEN]; + DBUG_EXECUTE_IF("old_row_based_repl_4_byte_map_id_master", + { + int4store(buf + 0, m_table_id); + int2store(buf + 4, m_flags); + return (my_b_safe_write(file, buf, 6)); + }); + int6store(buf + TM_MAPID_OFFSET, (ulonglong)m_table_id); + int2store(buf + TM_FLAGS_OFFSET, m_flags); + return (my_b_safe_write(file, buf, TABLE_MAP_HEADER_LEN)); +} + +bool Table_map_log_event::write_data_body(IO_CACHE *file) +{ + DBUG_ASSERT(m_dbnam != NULL); + DBUG_ASSERT(m_tblnam != NULL); + /* We use only one byte per length for storage in event: */ + DBUG_ASSERT(m_dblen < 128); + DBUG_ASSERT(m_tbllen < 128); + + byte const dbuf[]= { m_dblen }; + byte const tbuf[]= { m_tbllen }; + + char cbuf[sizeof(m_colcnt)]; + char *const cbuf_end= net_store_length((char*) cbuf, (uint) m_colcnt); + DBUG_ASSERT(static_cast<my_size_t>(cbuf_end - cbuf) <= sizeof(cbuf)); + + return (my_b_safe_write(file, dbuf, sizeof(dbuf)) || + my_b_safe_write(file, (const byte*)m_dbnam, m_dblen+1) || + my_b_safe_write(file, tbuf, sizeof(tbuf)) || + my_b_safe_write(file, (const byte*)m_tblnam, m_tbllen+1) || + my_b_safe_write(file, reinterpret_cast<byte*>(cbuf), + cbuf_end - (char*) cbuf) || + my_b_safe_write(file, reinterpret_cast<byte*>(m_coltype), m_colcnt)); + } +#endif + +#if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT) + +/* + Print some useful information for the SHOW BINARY LOG information + field. + */ + +void Table_map_log_event::pack_info(Protocol *protocol) +{ + char buf[256]; + my_size_t bytes= my_snprintf(buf, sizeof(buf), "%s.%s", m_dbnam, m_tblnam); + protocol->store(buf, bytes, &my_charset_bin); +} + +#endif + + +#ifdef MYSQL_CLIENT +void Table_map_log_event::print(FILE *file, PRINT_EVENT_INFO *print_event_info) +{ + if (!print_event_info->short_form) + { + print_header(file, print_event_info); + fprintf(file, "\tTable_map: `%s`.`%s` mapped to number %lu\n", + m_dbnam, m_tblnam, m_table_id); + print_base64(file, print_event_info); + } +} +#endif + +/************************************************************************** + Write_rows_log_event member functions +**************************************************************************/ + +/* + Constructor used to build an event for writing to the binary log. + */ +#if !defined(MYSQL_CLIENT) +Write_rows_log_event::Write_rows_log_event(THD *thd_arg, TABLE *tbl_arg, + ulong tid_arg, + MY_BITMAP const *cols, + bool is_transactional) + : Rows_log_event(thd_arg, tbl_arg, tid_arg, cols, is_transactional) +{ +} +#endif + +/* + Constructor used by slave to read the event from the binary log. + */ +#ifdef HAVE_REPLICATION +Write_rows_log_event::Write_rows_log_event(const char *buf, uint event_len, + const Format_description_log_event + *description_event) +: Rows_log_event(buf, event_len, WRITE_ROWS_EVENT, description_event) +{ +} +#endif + +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) +int Write_rows_log_event::do_before_row_operations(TABLE *table) +{ + int error= 0; + + /* + We are using REPLACE semantics and not INSERT IGNORE semantics + when writing rows, that is: new rows replace old rows. We need to + inform the storage engine that it should use this behaviour. + */ + + /* Tell the storage engine that we are using REPLACE semantics. */ + thd->lex->duplicates= DUP_REPLACE; + + /* + Pretend we're executing a REPLACE command: this is needed for + InnoDB and NDB Cluster since they are not (properly) checking the + lex->duplicates flag. + */ + thd->lex->sql_command= SQLCOM_REPLACE; + + table->file->extra(HA_EXTRA_IGNORE_DUP_KEY); // Needed for ndbcluster + table->file->extra(HA_EXTRA_IGNORE_NO_KEY); // Needed for ndbcluster + /* + TODO: the cluster team (Tomas?) says that it's better if the engine knows + how many rows are going to be inserted, then it can allocate needed memory + from the start. + */ + table->file->start_bulk_insert(0); + /* + We need TIMESTAMP_NO_AUTO_SET otherwise ha_write_row() will not use fill + any TIMESTAMP column with data from the row but instead will use + the event's current time. + As we replicate from TIMESTAMP to TIMESTAMP and slave has no extra + columns, we know that all TIMESTAMP columns on slave will receive explicit + data from the row, so TIMESTAMP_NO_AUTO_SET is ok. + When we allow a table without TIMESTAMP to be replicated to a table having + more columns including a TIMESTAMP column, or when we allow a TIMESTAMP + column to be replicated into a BIGINT column and the slave's table has a + TIMESTAMP column, then the slave's TIMESTAMP column will take its value + from set_time() which we called earlier (consistent with SBR). And then in + some cases we won't want TIMESTAMP_NO_AUTO_SET (will require some code to + analyze if explicit data is provided for slave's TIMESTAMP columns). + */ + table->timestamp_field_type= TIMESTAMP_NO_AUTO_SET; + return error; +} + +int Write_rows_log_event::do_after_row_operations(TABLE *table, int error) +{ + if (error == 0) + error= table->file->end_bulk_insert(); + return error; +} + +char const *Write_rows_log_event::do_prepare_row(THD *thd, TABLE *table, + char const *row_start) +{ + char const *ptr= row_start; + DBUG_ASSERT(table != NULL); + /* + This assertion actually checks that there is at least as many + columns on the slave as on the master. + */ + DBUG_ASSERT(table->s->fields >= m_width); + DBUG_ASSERT(ptr); + ptr= unpack_row(table, (byte*)table->record[0], ptr, &m_cols); + return ptr; +} + +/* + Check if there are more UNIQUE keys after the given key. +*/ +static int +last_uniq_key(TABLE *table, uint keyno) +{ + while (++keyno < table->s->keys) + if (table->key_info[keyno].flags & HA_NOSAME) + return 0; + return 1; +} + +/* Anonymous namespace for template functions/classes */ +namespace { + + /* + Smart pointer that will automatically call my_afree (a macro) when + the pointer goes out of scope. This is used so that I do not have + to remember to call my_afree() before each return. There is no + overhead associated with this, since all functions are inline. + + I (Matz) would prefer to use the free function as a template + parameter, but that is not possible when the "function" is a + macro. + */ + template <class Obj> + class auto_afree_ptr + { + Obj* m_ptr; + public: + auto_afree_ptr(Obj* ptr) : m_ptr(ptr) { } + ~auto_afree_ptr() { if (m_ptr) my_afree(m_ptr); } + void assign(Obj* ptr) { + /* Only to be called if it hasn't been given a value before. */ + DBUG_ASSERT(m_ptr == NULL); + m_ptr= ptr; + } + Obj* get() { return m_ptr; } + }; + +} + + +/* + Replace the provided record in the database. + + Similar to how it is done in <code>mysql_insert()</code>, we first + try to do a <code>ha_write_row()</code> and of that fails due to + duplicated keys (or indices), we do an <code>ha_update_row()</code> + or a <code>ha_delete_row()</code> instead. + + @param thd Thread context for writing the record. + @param table Table to which record should be written. + + @return Error code on failure, 0 on success. + */ +static int +replace_record(THD *thd, TABLE *table) +{ + DBUG_ASSERT(table != NULL && thd != NULL); + + int error; + int keynum; + auto_afree_ptr<char> key(NULL); + + while ((error= table->file->ha_write_row(table->record[0]))) + { + if ((keynum= table->file->get_dup_key(error)) < 0) + { + /* We failed to retrieve the duplicate key */ + return HA_ERR_FOUND_DUPP_KEY; + } + + /* + We need to retrieve the old row into record[1] to be able to + either update or delete the offending record. We either: + + - use rnd_pos() with a row-id (available as dupp_row) to the + offending row, if that is possible (MyISAM and Blackhole), or else + + - use index_read_idx() with the key that is duplicated, to + retrieve the offending row. + */ + if (table->file->table_flags() & HA_DUPP_POS) + { + error= table->file->rnd_pos(table->record[1], table->file->dupp_ref); + if (error) + return error; + } + else + { + if (table->file->extra(HA_EXTRA_FLUSH_CACHE)) + { + return my_errno; + } + + if (key.get() == NULL) + { + key.assign(static_cast<char*>(my_alloca(table->s->max_unique_length))); + if (key.get() == NULL) + return ENOMEM; + } + + key_copy((byte*)key.get(), table->record[0], table->key_info + keynum, 0); + error= table->file->index_read_idx(table->record[1], keynum, + (const byte*)key.get(), + table->key_info[keynum].key_length, + HA_READ_KEY_EXACT); + if (error) + return error; + } + + /* + Now, table->record[1] should contain the offending row. That + will enable us to update it or, alternatively, delete it (so + that we can insert the new row afterwards). + + REPLACE is defined as either INSERT or DELETE + INSERT. If + possible, we can replace it with an UPDATE, but that will not + work on InnoDB if FOREIGN KEY checks are necessary. + + I (Matz) am not sure of the reason for the last_uniq_key() + check as, but I'm guessing that it's something along the + following lines. + + Suppose that we got the duplicate key to be a key that is not + the last unique key for the table and we perform an update: + then there might be another key for which the unique check will + fail, so we're better off just deleting the row and inserting + the correct row. + */ + if (last_uniq_key(table, keynum) && + !table->file->referenced_by_foreign_key()) + { + error=table->file->ha_update_row(table->record[1], + table->record[0]); + return error; + } + else + { + if ((error= table->file->ha_delete_row(table->record[1]))) + return error; + /* Will retry ha_write_row() with the offending row removed. */ + } + } + return error; +} + +int Write_rows_log_event::do_exec_row(TABLE *table) +{ + DBUG_ASSERT(table != NULL); + int error= replace_record(thd, table); + return error; +} +#endif /* !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) */ + +#ifdef MYSQL_CLIENT +void Write_rows_log_event::print(FILE *file, PRINT_EVENT_INFO* print_event_info) +{ + if (!print_event_info->short_form) + { + print_header(file, print_event_info); + fprintf(file, "\tWrite_rows: table id %lu", m_table_id); + print_base64(file, print_event_info); + } +} +#endif + +/************************************************************************** + Delete_rows_log_event member functions +**************************************************************************/ + +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) +/* + Compares table->record[0] and table->record[1] + + Returns TRUE if different. +*/ +static bool record_compare(TABLE *table) +{ + if (table->s->blob_fields + table->s->varchar_fields == 0) + return cmp_record(table,record[1]); + /* Compare null bits */ + if (memcmp(table->null_flags, + table->null_flags+table->s->rec_buff_length, + table->s->null_bytes)) + return TRUE; // Diff in NULL value + /* Compare updated fields */ + for (Field **ptr=table->field ; *ptr ; ptr++) + { + if ((*ptr)->cmp_binary_offset(table->s->rec_buff_length)) + return TRUE; + } + return FALSE; +} + + +/* + Find the row given by 'key', if the table has keys, or else use a table scan + to find (and fetch) the row. If the engine allows random access of the + records, a combination of position() and rnd_pos() will be used. + */ +static int find_and_fetch_row(TABLE *table, byte *key) +{ + DBUG_ENTER("find_and_fetch_row(TABLE *table, byte *key, byte *record)"); + DBUG_PRINT("enter", ("table=%p, key=%p, record=%p", + table, key, table->record[1])); + + DBUG_ASSERT(table->in_use != NULL); + + if ((table->file->table_flags() & HA_PRIMARY_KEY_ALLOW_RANDOM_ACCESS) + && table->s->primary_key < MAX_KEY) + { + /* + Use a more efficient method to fetch the record given by + table->record[0] if the engine allows it. We first compute a + row reference using the position() member function (it will be + stored in table->file->ref) and the use rnd_pos() to position + the "cursor" at the correct row. + */ + table->file->position(table->record[0]); + DBUG_RETURN(table->file->rnd_pos(table->record[0], table->file->ref)); + } + + DBUG_ASSERT(table->record[1]); + + /* We need to retrieve all fields */ + table->file->ha_set_all_bits_in_read_set(); + + if (table->s->keys > 0) + { + int error; + if ((error= table->file->index_read_idx(table->record[1], 0, key, + table->key_info->key_length, + HA_READ_KEY_EXACT))) + { + table->file->print_error(error, MYF(0)); + DBUG_RETURN(error); + } + + /* + Below is a minor "optimization". If the key (i.e., key number + 0) has the HA_NOSAME flag set, we know that we have found the + correct record (since there can be no duplicates); otherwise, we + have to compare the record with the one found to see if it is + the correct one. + + CAVEAT! This behaviour is essential for the replication of, + e.g., the mysql.proc table since the correct record *shall* be + found using the primary key *only*. There shall be no + comparison of non-PK columns to decide if the correct record is + found. I can see no scenario where it would be incorrect to + chose the row to change only using a PK or an UNNI. + */ + if (table->key_info->flags & HA_NOSAME) + DBUG_RETURN(0); + + while (record_compare(table)) + { + int error; + if ((error= table->file->index_next(table->record[1]))) + { + table->file->print_error(error, MYF(0)); + DBUG_RETURN(error); + } + } + } + else + { + /* Continue until we find the right record or have made a full loop */ + int restart_count= 0; // Number of times scanning has restarted from top + int error= 0; + do + { + error= table->file->rnd_next(table->record[1]); + switch (error) + { + case 0: + case HA_ERR_RECORD_DELETED: + break; + + case HA_ERR_END_OF_FILE: + if (++restart_count < 2) + table->file->ha_rnd_init(1); + break; + + default: + table->file->print_error(error, MYF(0)); + DBUG_RETURN(error); + } + } + while (restart_count < 2 && record_compare(table)); + + DBUG_ASSERT(error == HA_ERR_END_OF_FILE || error == 0); + DBUG_RETURN(error); + } + + DBUG_RETURN(0); +} +#endif + +/* + Constructor used to build an event for writing to the binary log. + */ + +#ifndef MYSQL_CLIENT +Delete_rows_log_event::Delete_rows_log_event(THD *thd_arg, TABLE *tbl_arg, + ulong tid, MY_BITMAP const *cols, + bool is_transactional) + : Rows_log_event(thd_arg, tbl_arg, tid, cols, is_transactional) +#ifdef HAVE_REPLICATION + ,m_memory(NULL), m_key(NULL), m_after_image(NULL) +#endif +{ +} +#endif /* #if !defined(MYSQL_CLIENT) */ + +/* + Constructor used by slave to read the event from the binary log. + */ +#ifdef HAVE_REPLICATION +Delete_rows_log_event::Delete_rows_log_event(const char *buf, uint event_len, + const Format_description_log_event + *description_event) +#if defined(MYSQL_CLIENT) + : Rows_log_event(buf, event_len, DELETE_ROWS_EVENT, description_event) +#else + : Rows_log_event(buf, event_len, DELETE_ROWS_EVENT, description_event), + m_memory(NULL), m_key(NULL), m_after_image(NULL) +#endif +{ +} +#endif + +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) +int Delete_rows_log_event::do_before_row_operations(TABLE *table) +{ + DBUG_ASSERT(m_memory == NULL); + + if ((table->file->table_flags() & HA_PRIMARY_KEY_ALLOW_RANDOM_ACCESS) && + table->s->primary_key < MAX_KEY) + { + /* + We don't need to allocate any memory for m_after_image and + m_key since they are not used. + */ + return 0; + } + + int error= 0; + + if (table->s->keys > 0) + { + m_memory= + my_multi_malloc(MYF(MY_WME), + &m_after_image, table->s->reclength, + &m_key, table->key_info->key_length, + NULL); + } + else + { + m_after_image= (byte*)my_malloc(table->s->reclength, MYF(MY_WME)); + m_memory= (gptr)m_after_image; + m_key= NULL; + } + if (!m_memory) + return HA_ERR_OUT_OF_MEM; + + if (table->s->keys > 0) + { + /* We have a key: search the table using the index */ + if (!table->file->inited) + error= table->file->ha_index_init(0, FALSE); + } + else + { + /* We doesn't have a key: search the table using rnd_next() */ + error= table->file->ha_rnd_init(1); + } + + return error; +} + +int Delete_rows_log_event::do_after_row_operations(TABLE *table, int error) +{ + /*error= ToDo:find out what this should really be, this triggers close_scan in nbd, returning error?*/ + table->file->ha_index_or_rnd_end(); + my_free(m_memory, MYF(MY_ALLOW_ZERO_PTR)); // Free for multi_malloc + m_memory= NULL; + m_after_image= NULL; + m_key= NULL; + + return error; +} + +char const *Delete_rows_log_event::do_prepare_row(THD *thd, TABLE *table, + char const *row_start) +{ + char const *ptr= row_start; + DBUG_ASSERT(ptr); + /* + This assertion actually checks that there is at least as many + columns on the slave as on the master. + */ + DBUG_ASSERT(table->s->fields >= m_width); + + DBUG_ASSERT(ptr != NULL); + ptr= unpack_row(table, table->record[0], ptr, &m_cols); + + /* + If we will access rows using the random access method, m_key will + be set to NULL, so we do not need to make a key copy in that case. + */ + if (m_key) + { + KEY *const key_info= table->key_info; + + key_copy(m_key, table->record[0], key_info, 0); + } + + return ptr; +} + +int Delete_rows_log_event::do_exec_row(TABLE *table) +{ + DBUG_ASSERT(table != NULL); + + int error= find_and_fetch_row(table, m_key); + if (error) + return error; + + /* + Now we should have the right row to delete. We are using + record[0] since it is guaranteed to point to a record with the + correct value. + */ + error= table->file->ha_delete_row(table->record[0]); + + return error; +} + +#endif /* !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) */ + +#ifdef MYSQL_CLIENT +void Delete_rows_log_event::print(FILE *file, + PRINT_EVENT_INFO* print_event_info) +{ + if (!print_event_info->short_form) + { + print_header(file, print_event_info); + fprintf(file, "\tDelete_rows: table id %lu", m_table_id); + print_base64(file, print_event_info); + } +} +#endif + + +/************************************************************************** + Update_rows_log_event member functions +**************************************************************************/ + +/* + Constructor used to build an event for writing to the binary log. + */ +#if !defined(MYSQL_CLIENT) +Update_rows_log_event::Update_rows_log_event(THD *thd_arg, TABLE *tbl_arg, + ulong tid, MY_BITMAP const *cols, + bool is_transactional) +: Rows_log_event(thd_arg, tbl_arg, tid, cols, is_transactional) +#ifdef HAVE_REPLICATION + , m_memory(NULL), m_key(NULL) +#endif +{ +} +#endif /* !defined(MYSQL_CLIENT) */ + +/* + Constructor used by slave to read the event from the binary log. + */ +#ifdef HAVE_REPLICATION +Update_rows_log_event::Update_rows_log_event(const char *buf, uint event_len, + const + Format_description_log_event + *description_event) +#if defined(MYSQL_CLIENT) + : Rows_log_event(buf, event_len, UPDATE_ROWS_EVENT, description_event) +#else + : Rows_log_event(buf, event_len, UPDATE_ROWS_EVENT, description_event), + m_memory(NULL), m_key(NULL) +#endif +{ +} +#endif + +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) +int Update_rows_log_event::do_before_row_operations(TABLE *table) +{ + DBUG_ASSERT(m_memory == NULL); + + int error= 0; + + if (table->s->keys > 0) + { + m_memory= + my_multi_malloc(MYF(MY_WME), + &m_after_image, table->s->reclength, + &m_key, table->key_info->key_length, + NULL); + } + else + { + m_after_image= (byte*)my_malloc(table->s->reclength, MYF(MY_WME)); + m_memory= (gptr)m_after_image; + m_key= NULL; + } + if (!m_memory) + return HA_ERR_OUT_OF_MEM; + + if (table->s->keys > 0) + { + /* We have a key: search the table using the index */ + if (!table->file->inited) + error= table->file->ha_index_init(0, FALSE); + } + else + { + /* We doesn't have a key: search the table using rnd_next() */ + error= table->file->ha_rnd_init(1); + } + table->timestamp_field_type= TIMESTAMP_NO_AUTO_SET; + + return error; +} + +int Update_rows_log_event::do_after_row_operations(TABLE *table, int error) +{ + /*error= ToDo:find out what this should really be, this triggers close_scan in nbd, returning error?*/ + table->file->ha_index_or_rnd_end(); + my_free(m_memory, MYF(MY_ALLOW_ZERO_PTR)); + m_memory= NULL; + m_after_image= NULL; + m_key= NULL; + + return error; +} + +char const *Update_rows_log_event::do_prepare_row(THD *thd, TABLE *table, + char const *row_start) +{ + char const *ptr= row_start; + DBUG_ASSERT(ptr); + /* + This assertion actually checks that there is at least as many + columns on the slave as on the master. + */ + DBUG_ASSERT(table->s->fields >= m_width); + + /* record[0] is the before image for the update */ + ptr= unpack_row(table, table->record[0], ptr, &m_cols); + DBUG_ASSERT(ptr != NULL); + /* m_after_image is the after image for the update */ + ptr= unpack_row(table, m_after_image, ptr, &m_cols); + + /* + If we will access rows using the random access method, m_key will + be set to NULL, so we do not need to make a key copy in that case. + */ + if (m_key) + { + KEY *const key_info= table->key_info; + + key_copy(m_key, table->record[0], key_info, 0); + } + + return ptr; +} + +int Update_rows_log_event::do_exec_row(TABLE *table) +{ + DBUG_ASSERT(table != NULL); + + int error= find_and_fetch_row(table, m_key); + if (error) + return error; + + /* + This is only a precaution to make sure that the call to + ha_update_row is using record[1]. + + If this is not needed/required, then we could use m_after_image in + that call instead. + */ + bmove_align(table->record[1], m_after_image,(size_t) table->s->reclength); + + /* + Now we should have the right row to update. The record that has + been fetched is guaranteed to be in record[0], so we use that. + */ + error= table->file->ha_update_row(table->record[0], table->record[1]); + + return error; +} +#endif /* !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) */ + +#ifdef MYSQL_CLIENT +void Update_rows_log_event::print(FILE *file, + PRINT_EVENT_INFO* print_event_info) +{ + if (!print_event_info->short_form) + { + print_header(file, print_event_info); + fprintf(file, "\tUpdate_rows: table id %lu", m_table_id); + print_base64(file, print_event_info); + } +} +#endif + +#endif /* defined(HAVE_ROW_BASED_REPLICATION) */ diff --git a/sql/log_event.h b/sql/log_event.h index 0e1eb7cd13c..dd68e969ff1 100644 --- a/sql/log_event.h +++ b/sql/log_event.h @@ -26,6 +26,8 @@ #pragma interface /* gcc class implementation */ #endif +#include <my_bitmap.h> + #define LOG_READ_EOF -1 #define LOG_READ_BOGUS -2 #define LOG_READ_IO -3 @@ -197,6 +199,8 @@ struct sql_ex_info #define EXEC_LOAD_HEADER_LEN 4 #define DELETE_FILE_HEADER_LEN 4 #define FORMAT_DESCRIPTION_HEADER_LEN (START_V3_HEADER_LEN+1+LOG_EVENT_TYPES) +#define ROWS_HEADER_LEN 8 +#define TABLE_MAP_HEADER_LEN 8 #define EXECUTE_LOAD_QUERY_EXTRA_HEADER_LEN (4 + 4 + 4 + 1) #define EXECUTE_LOAD_QUERY_HEADER_LEN (QUERY_HEADER_LEN + EXECUTE_LOAD_QUERY_EXTRA_HEADER_LEN) @@ -303,6 +307,14 @@ struct sql_ex_info /* DF = "Delete File" */ #define DF_FILE_ID_OFFSET 0 +/* TM = "Table Map" */ +#define TM_MAPID_OFFSET 0 +#define TM_FLAGS_OFFSET 6 + +/* RW = "RoWs" */ +#define RW_MAPID_OFFSET 0 +#define RW_FLAGS_OFFSET 6 + /* ELQ = "Execute Load Query" */ #define ELQ_FILE_ID_OFFSET QUERY_HEADER_LEN #define ELQ_FN_POS_START_OFFSET ELQ_FILE_ID_OFFSET + 4 @@ -374,6 +386,12 @@ struct sql_ex_info #define LOG_EVENT_SUPPRESS_USE_F 0x8 /* + The table map version internal to the log should be increased after + the event has been written to the binary log. + */ +#define LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F 0x10 + +/* OPTIONS_WRITTEN_TO_BIN_LOG are the bits of thd->options which must be written to the binlog. OPTIONS_WRITTEN_TO_BINLOG could be written into the Format_description_log_event, so that if later we don't want @@ -428,6 +446,10 @@ enum Log_event_type XID_EVENT= 16, BEGIN_LOAD_QUERY_EVENT= 17, EXECUTE_LOAD_QUERY_EVENT= 18, + TABLE_MAP_EVENT = 19, + WRITE_ROWS_EVENT = 20, + UPDATE_ROWS_EVENT = 21, + DELETE_ROWS_EVENT = 22, /* Add new events here - right above this comment! @@ -505,6 +527,7 @@ typedef struct st_print_event_info /* Settings on how to print the events */ bool short_form; + bool base64_output; my_off_t hexdump_from; uint8 common_header_len; @@ -617,9 +640,10 @@ public: static Log_event* read_log_event(IO_CACHE* file, const Format_description_log_event *description_event); /* print*() functions are used by mysqlbinlog */ - virtual void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0) = 0; + virtual void print(FILE* file, PRINT_EVENT_INFO* print_event_info) = 0; void print_timestamp(FILE* file, time_t *ts = 0); - void print_header(FILE* file, PRINT_EVENT_INFO* print_event_info= 0); + void print_header(FILE* file, PRINT_EVENT_INFO* print_event_info); + void print_base64(FILE* file, PRINT_EVENT_INFO* print_event_info); #endif static void *operator new(size_t size) @@ -647,7 +671,7 @@ public: virtual Log_event_type get_type_code() = 0; virtual bool is_valid() const = 0; virtual bool is_artificial_event() { return 0; } - inline bool get_cache_stmt() { return cache_stmt; } + inline bool get_cache_stmt() const { return cache_stmt; } Log_event(const char* buf, const Format_description_log_event* description_event); virtual ~Log_event() { free_temp_buf();} void register_temp_buf(char* buf) { temp_buf = buf; } @@ -779,8 +803,8 @@ public: uint32 q_len_arg); #endif /* HAVE_REPLICATION */ #else - void print_query_header(FILE* file, PRINT_EVENT_INFO* print_event_info= 0); - void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0); + void print_query_header(FILE* file, PRINT_EVENT_INFO* print_event_info); + void print(FILE* file, PRINT_EVENT_INFO* print_event_info); #endif Query_log_event(const char* buf, uint event_len, @@ -834,7 +858,7 @@ public: void pack_info(Protocol* protocol); int exec_event(struct st_relay_log_info* rli); #else - void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0); + void print(FILE* file, PRINT_EVENT_INFO* print_event_info); #endif Slave_log_event(const char* buf, uint event_len); @@ -922,7 +946,7 @@ public: bool use_rli_only_for_errors); #endif /* HAVE_REPLICATION */ #else - void print(FILE* file, PRINT_EVENT_INFO* print_event_info = 0); + void print(FILE* file, PRINT_EVENT_INFO* print_event_info); void print(FILE* file, PRINT_EVENT_INFO* print_event_info, bool commented); #endif @@ -1012,7 +1036,7 @@ public: #endif /* HAVE_REPLICATION */ #else Start_log_event_v3() {} - void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0); + void print(FILE* file, PRINT_EVENT_INFO* print_event_info); #endif Start_log_event_v3(const char* buf, @@ -1107,7 +1131,7 @@ public: int exec_event(struct st_relay_log_info* rli); #endif /* HAVE_REPLICATION */ #else - void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0); + void print(FILE* file, PRINT_EVENT_INFO* print_event_info); #endif Intvar_log_event(const char* buf, const Format_description_log_event* description_event); @@ -1148,7 +1172,7 @@ class Rand_log_event: public Log_event int exec_event(struct st_relay_log_info* rli); #endif /* HAVE_REPLICATION */ #else - void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0); + void print(FILE* file, PRINT_EVENT_INFO* print_event_info); #endif Rand_log_event(const char* buf, const Format_description_log_event* description_event); @@ -1185,7 +1209,7 @@ class Xid_log_event: public Log_event int exec_event(struct st_relay_log_info* rli); #endif /* HAVE_REPLICATION */ #else - void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0); + void print(FILE* file, PRINT_EVENT_INFO* print_event_info); #endif Xid_log_event(const char* buf, const Format_description_log_event* description_event); @@ -1196,6 +1220,9 @@ class Xid_log_event: public Log_event bool write(IO_CACHE* file); #endif bool is_valid() const { return 1; } +#if !defined(DBUG_OFF) && !defined(MYSQL_CLIENT) + static my_bool show_xid; +#endif }; /***************************************************************************** @@ -1227,7 +1254,7 @@ public: void pack_info(Protocol* protocol); int exec_event(struct st_relay_log_info* rli); #else - void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0); + void print(FILE* file, PRINT_EVENT_INFO* print_event_info); #endif User_var_log_event(const char* buf, const Format_description_log_event* description_event); @@ -1253,7 +1280,7 @@ public: {} int exec_event(struct st_relay_log_info* rli); #else - void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0); + void print(FILE* file, PRINT_EVENT_INFO* print_event_info); #endif Stop_log_event(const char* buf, const Format_description_log_event* description_event): @@ -1283,7 +1310,7 @@ public: uint ident_len; uint flags; #ifndef MYSQL_CLIENT - Rotate_log_event(THD* thd_arg, const char* new_log_ident_arg, + Rotate_log_event(const char* new_log_ident_arg, uint ident_len_arg, ulonglong pos_arg, uint flags); #ifdef HAVE_REPLICATION @@ -1291,7 +1318,7 @@ public: int exec_event(struct st_relay_log_info* rli); #endif /* HAVE_REPLICATION */ #else - void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0); + void print(FILE* file, PRINT_EVENT_INFO* print_event_info); #endif Rotate_log_event(const char* buf, uint event_len, @@ -1344,7 +1371,7 @@ public: int exec_event(struct st_relay_log_info* rli); #endif /* HAVE_REPLICATION */ #else - void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0); + void print(FILE* file, PRINT_EVENT_INFO* print_event_info); void print(FILE* file, PRINT_EVENT_INFO* print_event_info, bool enable_local); #endif @@ -1412,7 +1439,7 @@ public: virtual int get_create_or_append() const; #endif /* HAVE_REPLICATION */ #else - void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0); + void print(FILE* file, PRINT_EVENT_INFO* print_event_info); #endif Append_block_log_event(const char* buf, uint event_len, @@ -1423,8 +1450,8 @@ public: bool is_valid() const { return block != 0; } #ifndef MYSQL_CLIENT bool write(IO_CACHE* file); -#endif const char* get_db() { return db; } +#endif }; @@ -1447,7 +1474,7 @@ public: int exec_event(struct st_relay_log_info* rli); #endif /* HAVE_REPLICATION */ #else - void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0); + void print(FILE* file, PRINT_EVENT_INFO* print_event_info); void print(FILE* file, PRINT_EVENT_INFO* print_event_info, bool enable_local); #endif @@ -1459,8 +1486,8 @@ public: bool is_valid() const { return file_id != 0; } #ifndef MYSQL_CLIENT bool write(IO_CACHE* file); -#endif const char* get_db() { return db; } +#endif }; @@ -1483,7 +1510,7 @@ public: int exec_event(struct st_relay_log_info* rli); #endif /* HAVE_REPLICATION */ #else - void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0); + void print(FILE* file, PRINT_EVENT_INFO* print_event_info); #endif Execute_load_log_event(const char* buf, uint event_len, @@ -1494,8 +1521,8 @@ public: bool is_valid() const { return file_id != 0; } #ifndef MYSQL_CLIENT bool write(IO_CACHE* file); -#endif const char* get_db() { return db; } +#endif }; @@ -1568,7 +1595,7 @@ public: int exec_event(struct st_relay_log_info* rli); #endif /* HAVE_REPLICATION */ #else - void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0); + void print(FILE* file, PRINT_EVENT_INFO* print_event_info); /* Prints the query as LOAD DATA LOCAL and with rewritten filename */ void print(FILE* file, PRINT_EVENT_INFO* print_event_info, const char *local_fname); @@ -1600,10 +1627,526 @@ public: Log_event(buf, description_event) {} ~Unknown_log_event() {} - void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0); + void print(FILE* file, PRINT_EVENT_INFO* print_event_info); Log_event_type get_type_code() { return UNKNOWN_EVENT;} bool is_valid() const { return 1; } }; #endif char *str_to_hex(char *to, const char *from, uint len); + +/***************************************************************************** + + Table map log event class + + Create a mapping from a (database name, table name) couple to a table + identifier (an integer number). + + ****************************************************************************/ + +class Table_map_log_event : public Log_event +{ +public: + /* Constants */ + enum + { + TYPE_CODE = TABLE_MAP_EVENT + }; + + enum enum_error + { + ERR_OPEN_FAILURE = -1, /* Failure to open table */ + ERR_OK = 0, /* No error */ + ERR_TABLE_LIMIT_EXCEEDED = 1, /* No more room for tables */ + ERR_OUT_OF_MEM = 2, /* Out of memory */ + ERR_BAD_TABLE_DEF = 3, /* Table definition does not match */ + ERR_RBR_TO_SBR = 4 /* daisy-chanining RBR to SBR not allowed */ + }; + + enum enum_flag + { + /* + Nothing here right now, but the flags support is there in + preparation for changes that are coming. Need to add a + constant to make it compile under HP-UX: aCC does not like + empty enumerations. + */ + ENUM_FLAG_COUNT + }; + + typedef uint16 flag_set; + + /* Special constants representing sets of flags */ + enum + { + TM_NO_FLAGS = 0U + }; + + void set_flags(flag_set flag) { m_flags |= flag; } + void clear_flags(flag_set flag) { m_flags &= ~flag; } + flag_set get_flags(flag_set flag) const { return m_flags & flag; } + +#ifndef MYSQL_CLIENT + Table_map_log_event(THD *thd, TABLE *tbl, ulong tid, + bool is_transactional, uint16 flags); +#endif +#ifdef HAVE_REPLICATION + Table_map_log_event(const char *buf, uint event_len, + const Format_description_log_event *description_event); +#endif + + ~Table_map_log_event(); + + virtual Log_event_type get_type_code() { return TABLE_MAP_EVENT; } + virtual bool is_valid() const { return m_memory != NULL; /* we check malloc */ } + + virtual int get_data_size() { return m_data_size; } +#ifndef MYSQL_CLIENT + virtual bool write_data_header(IO_CACHE *file); + virtual bool write_data_body(IO_CACHE *file); + virtual const char *get_db() { return m_dbnam; } +#endif + +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) + virtual int exec_event(struct st_relay_log_info *rli); + virtual void pack_info(Protocol *protocol); +#endif + +#ifdef MYSQL_CLIENT + virtual void print(FILE *file, PRINT_EVENT_INFO *print_event_info); +#endif + + +private: +#ifndef MYSQL_CLIENT + TABLE *m_table; +#endif + char const *m_dbnam; + my_size_t m_dblen; + char const *m_tblnam; + my_size_t m_tbllen; + ulong m_colcnt; + unsigned char *m_coltype; + + gptr m_memory; + ulong m_table_id; + flag_set m_flags; + + my_size_t m_data_size; +}; + + +/***************************************************************************** + + Row level log event class. + + Common base class for all row-level log events. + + RESPONSIBILITIES + + Encode the common parts of all events containing rows, which are: + - Write data header and data body to an IO_CACHE. + - Provide an interface for adding an individual row to the event. + + ****************************************************************************/ + +class Rows_log_event : public Log_event +{ +public: + /* + These definitions allow you to combine the flags into an + appropriate flag set using the normal bitwise operators. The + implicit conversion from an enum-constant to an integer is + accepted by the compiler, which is then used to set the real set + of flags. + */ + + enum enum_flag + { + /* Last event of a statement */ + STMT_END_F = (1U << 0), + + /* Value of the OPTION_NO_FOREIGN_KEY_CHECKS flag in thd->options */ + NO_FOREIGN_KEY_CHECKS_F = (1U << 1), + + /* Value of the OPTION_RELAXED_UNIQUE_CHECKS flag in thd->options */ + RELAXED_UNIQUE_CHECKS_F = (1U << 2) + }; + + typedef uint16 flag_set; + + /* Special constants representing sets of flags */ + enum + { + RLE_NO_FLAGS = 0U + }; + + virtual ~Rows_log_event(); + + void set_flags(flag_set flags) { m_flags |= flags; } + void clear_flags(flag_set flags) { m_flags &= ~flags; } + flag_set get_flags(flag_set flags) const { return m_flags & flags; } + +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) + virtual int exec_event(struct st_relay_log_info *rli); +#ifdef DBUG_RBR + virtual void pack_info(Protocol *protocol); +#endif +#endif + +#ifdef MYSQL_CLIENT + /* not for direct call, each derived has its own ::print() */ + virtual void print(FILE *file, PRINT_EVENT_INFO *print_event_info)= 0; +#endif + +#ifndef MYSQL_CLIENT + int add_row_data(byte *data, my_size_t length) + { + return do_add_row_data(data,length); + } +#endif + + /* Member functions to implement superclass interface */ + virtual int get_data_size() + { + DBUG_EXECUTE_IF("old_row_based_repl_4_byte_map_id_master", + return 6 + 1 + no_bytes_in_map(&m_cols) + + (m_rows_cur - m_rows_buf);); + return ROWS_HEADER_LEN + 1 + no_bytes_in_map(&m_cols) + + (m_rows_cur - m_rows_buf); + } + + MY_BITMAP const *get_cols() const { return &m_cols; } + my_size_t get_width() const { return m_width; } + ulong get_table_id() const { return m_table_id; } + +#ifndef MYSQL_CLIENT + virtual bool write_data_header(IO_CACHE *file); + virtual bool write_data_body(IO_CACHE *file); + virtual const char *get_db() { return m_table->s->db.str; } +#endif + virtual bool is_valid() const + { + /* that's how we check malloc() succeeded */ + return m_rows_buf && m_cols.bitmap; + } + + /* + If there is no table map active for the event, write one to the + binary log. + + LOCK_log has to be aquired before calling this function. + + PARAMETERS + thd - Thread to use when writing the table map + + RETURN VALUE + Error code, or zero if write succeeded. + */ +#if !defined(MYSQL_CLIENT) && defined(HAVE_ROW_BASED_REPLICATION) + int maybe_write_table_map(THD *thd, IO_CACHE *file, MYSQL_LOG *log) const + { + /* + N.B., get_cache_stmt() returns the value of 'using_trans' that + was provided to the constructor, i.e., get_cache_stmt() == true + if and only if the table is transactional. + */ + + int result= 0; + if (!log->is_table_mapped(m_table)) + result= log->write_table_map(thd, file, m_table, get_cache_stmt()); + return result; + } +#endif + +protected: + /* + The constructors are protected since you're supposed to inherit + this class, not create instances of this class. + */ +#ifndef MYSQL_CLIENT + Rows_log_event(THD*, TABLE*, ulong table_id, + MY_BITMAP const *cols, bool is_transactional); +#endif + Rows_log_event(const char *row_data, uint event_len, + Log_event_type event_type, + const Format_description_log_event *description_event); + +#ifndef MYSQL_CLIENT + virtual int do_add_row_data(byte *data, my_size_t length); +#endif + +#ifndef MYSQL_CLIENT + TABLE *m_table; /* The table the rows belong to */ +#endif + ulong m_table_id; /* Table ID */ + MY_BITMAP m_cols; /* Bitmap denoting columns available */ + ulong m_width; /* The width of the columns bitmap */ + + /* Bit buffer in the same memory as the class */ + uint32 m_bitbuf[128/(sizeof(uint32)*8)]; + + byte *m_rows_buf; /* The rows in packed format */ + byte *m_rows_cur; /* One-after the end of the data */ + byte *m_rows_end; /* One-after the end of the allocated space */ + + flag_set m_flags; /* Flags for row-level events */ + +private: + +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) + /* + Primitive to prepare for a sequence of row executions. + + DESCRIPTION + + Before doing a sequence of do_prepare_row() and do_exec_row() + calls, this member function should be called to prepare for the + entire sequence. Typically, this member function will allocate + space for any buffers that are needed for the two member + functions mentioned above. + + RETURN VALUE + + The member function will return 0 if all went OK, or a non-zero + error code otherwise. + */ + virtual int do_before_row_operations(TABLE *table) = 0; + + /* + Primitive to clean up after a sequence of row executions. + + DESCRIPTION + + After doing a sequence of do_prepare_row() and do_exec_row(), + this member function should be called to clean up and release + any allocated buffers. + */ + virtual int do_after_row_operations(TABLE *table, int error) = 0; + + /* + Primitive to prepare for handling one row in a row-level event. + + DESCRIPTION + + The member function prepares for execution of operations needed for one + row in a row-level event by reading up data from the buffer containing + the row. No specific interpretation of the data is normally done here, + since SQL thread specific data is not available: that data is made + available for the do_exec function. + + RETURN VALUE + A pointer to the start of the next row, or NULL if the preparation + failed. Currently, preparation cannot fail, but don't rely on this + behavior. + */ + virtual char const *do_prepare_row(THD*, TABLE*, char const *row_start) = 0; + + /* + Primitive to do the actual execution necessary for a row. + + DESCRIPTION + The member function will do the actual execution needed to handle a row. + + RETURN VALUE + 0 if execution succeeded, 1 if execution failed. + + */ + virtual int do_exec_row(TABLE *table) = 0; +#endif /* !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) */ +}; + + +/***************************************************************************** + + Write row log event class + + Log row insertions and updates. The event contain several + insert/update rows for a table. Note that each event contains only + rows for one table. + + ****************************************************************************/ +class Write_rows_log_event : public Rows_log_event +{ +public: + enum + { + /* Support interface to THD::binlog_prepare_pending_rows_event */ + TYPE_CODE = WRITE_ROWS_EVENT + }; + +#if !defined(MYSQL_CLIENT) + Write_rows_log_event(THD*, TABLE*, ulong table_id, + MY_BITMAP const *cols, bool is_transactional); +#endif +#ifdef HAVE_REPLICATION + Write_rows_log_event(const char *buf, uint event_len, + const Format_description_log_event *description_event); +#endif +#if !defined(MYSQL_CLIENT) && defined(HAVE_ROW_BASED_REPLICATION) + static bool binlog_row_logging_function(THD *thd, TABLE *table, + bool is_transactional, + MY_BITMAP *cols, + uint fields, + const byte *before_record + __attribute__((unused)), + const byte *after_record) + { + return thd->binlog_write_row(table, is_transactional, + cols, fields, after_record); + } +#endif + +private: + virtual Log_event_type get_type_code() { return (Log_event_type)TYPE_CODE; } + +#ifdef MYSQL_CLIENT + void print(FILE *file, PRINT_EVENT_INFO *print_event_info); +#endif + +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) + gptr m_memory; + byte *m_after_image; + + virtual int do_before_row_operations(TABLE *table); + virtual int do_after_row_operations(TABLE *table, int error); + virtual char const *do_prepare_row(THD*, TABLE*, char const *row_start); + virtual int do_exec_row(TABLE *table); +#endif +}; + + +/***************************************************************************** + + Update rows log event class + + Log row updates with a before image. The event contain several + update rows for a table. Note that each event contains only rows for + one table. + + Also note that the row data consists of pairs of row data: one row + for the old data and one row for the new data. + + ****************************************************************************/ +class Update_rows_log_event : public Rows_log_event +{ +public: + enum + { + /* Support interface to THD::binlog_prepare_pending_rows_event */ + TYPE_CODE = UPDATE_ROWS_EVENT + }; + +#ifndef MYSQL_CLIENT + Update_rows_log_event(THD*, TABLE*, ulong table_id, + MY_BITMAP const *cols, bool is_transactional); +#endif + +#ifdef HAVE_REPLICATION + Update_rows_log_event(const char *buf, uint event_len, + const Format_description_log_event *description_event); +#endif + +#if !defined(MYSQL_CLIENT) && defined(HAVE_ROW_BASED_REPLICATION) + static bool binlog_row_logging_function(THD *thd, TABLE *table, + bool is_transactional, + MY_BITMAP *cols, + uint fields, + const byte *before_record, + const byte *after_record) + { + return thd->binlog_update_row(table, is_transactional, + cols, fields, before_record, after_record); + } +#endif + +private: + virtual Log_event_type get_type_code() { return (Log_event_type)TYPE_CODE; } + +#ifdef MYSQL_CLIENT + void print(FILE *file, PRINT_EVENT_INFO *print_event_info); +#endif + +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) + gptr m_memory; + byte *m_key; + byte *m_after_image; + + virtual int do_before_row_operations(TABLE *table); + virtual int do_after_row_operations(TABLE *table, int error); + virtual char const *do_prepare_row(THD*, TABLE*, char const *row_start); + virtual int do_exec_row(TABLE *table); +#endif /* !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) */ +}; + +/***************************************************************************** + + Delete rows log event class. + + Log row deletions. The event contain several delete rows for a + table. Note that each event contains only rows for one table. + + RESPONSIBILITIES + + - Act as a container for rows that has been deleted on the master + and should be deleted on the slave. + + COLLABORATION + + Row_writer + Create the event and add rows to the event. + Row_reader + Extract the rows from the event. + + ****************************************************************************/ +class Delete_rows_log_event : public Rows_log_event +{ +public: + enum + { + /* Support interface to THD::binlog_prepare_pending_rows_event */ + TYPE_CODE = DELETE_ROWS_EVENT + }; + +#ifndef MYSQL_CLIENT + Delete_rows_log_event(THD*, TABLE*, ulong, + MY_BITMAP const *cols, bool is_transactional); +#endif +#ifdef HAVE_REPLICATION + Delete_rows_log_event(const char *buf, uint event_len, + const Format_description_log_event *description_event); +#endif +#if !defined(MYSQL_CLIENT) && defined(HAVE_ROW_BASED_REPLICATION) + static bool binlog_row_logging_function(THD *thd, TABLE *table, + bool is_transactional, + MY_BITMAP *cols, + uint fields, + const byte *before_record, + const byte *after_record + __attribute__((unused))) + { + return thd->binlog_delete_row(table, is_transactional, + cols, fields, before_record); + } +#endif + +private: + virtual Log_event_type get_type_code() { return (Log_event_type)TYPE_CODE; } + +#ifdef MYSQL_CLIENT + void print(FILE *file, PRINT_EVENT_INFO *print_event_info); +#endif + +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) + gptr m_memory; + byte *m_key; + byte *m_after_image; + + virtual int do_before_row_operations(TABLE *table); + virtual int do_after_row_operations(TABLE *table, int error); + virtual char const *do_prepare_row(THD*, TABLE*, char const *row_start); + virtual int do_exec_row(TABLE *table); +#endif +}; + + #endif /* _log_event_h */ diff --git a/sql/my_decimal.cc b/sql/my_decimal.cc index 1bd16940b47..89607129026 100644 --- a/sql/my_decimal.cc +++ b/sql/my_decimal.cc @@ -193,16 +193,23 @@ int str2my_decimal(uint mask, const char *from, uint length, #ifndef DBUG_OFF /* routines for debugging print */ +#define DIG_PER_DEC1 9 +#define ROUND_UP(X) (((X)+DIG_PER_DEC1-1)/DIG_PER_DEC1) + /* print decimal */ void print_decimal(const my_decimal *dec) { - fprintf(DBUG_FILE, - "\nDecimal: sign: %d intg: %d frac: %d \n\ -%09d,%09d,%09d,%09d,%09d,%09d,%09d,%09d\n", - dec->sign(), dec->intg, dec->frac, - dec->buf[0], dec->buf[1], dec->buf[2], dec->buf[3], - dec->buf[4], dec->buf[5], dec->buf[6], dec->buf[7]); + int i, end; + char buff[512], *pos; + pos= buff; + pos+= my_sprintf(buff, (buff, "Decimal: sign: %d intg: %d frac: %d { ", + dec->sign(), dec->intg, dec->frac)); + end= ROUND_UP(dec->frac)+ROUND_UP(dec->intg)-1; + for (i=0; i < end; i++) + pos+= my_sprintf(pos, (pos, "%09d, ", dec->buf[i])); + pos+= my_sprintf(pos, (pos, "%09d }\n", dec->buf[i])); + fputs(buff, DBUG_FILE); } diff --git a/sql/mysql_priv.h b/sql/mysql_priv.h index 2c817ae54c2..b3c3e4d9211 100644 --- a/sql/mysql_priv.h +++ b/sql/mysql_priv.h @@ -92,6 +92,15 @@ char* query_table_status(THD *thd,const char *db,const char *table_name); #define PREV_BITS(type,A) ((type) (((type) 1 << (A)) -1)) #define all_bits_set(A,B) ((A) & (B) != (B)) +#define WARN_DEPRECATED(Thd,Ver,Old,New) \ + do { \ + DBUG_ASSERT(strncmp(Ver, MYSQL_SERVER_VERSION, sizeof(Ver)-1) >= 0); \ + push_warning_printf(((THD *)Thd), MYSQL_ERROR::WARN_LEVEL_WARN, \ + ER_WARN_DEPRECATED, ER(ER_WARN_DEPRECATED), \ + (Old), (Ver), (New)); \ + } while(0) + + extern CHARSET_INFO *system_charset_info, *files_charset_info ; extern CHARSET_INFO *national_charset_info, *table_alias_charset; @@ -191,11 +200,6 @@ extern CHARSET_INFO *national_charset_info, *table_alias_charset; #define FLUSH_TIME 0 /* Don't flush tables */ #define MAX_CONNECT_ERRORS 10 // errors before disabling host -#ifdef HAVE_INNOBASE_DB -#define IF_INNOBASE_DB(A, B) (A) -#else -#define IF_INNOBASE_DB(A, B) (B) -#endif #ifdef __NETWARE__ #define IF_NETWARE(A,B) (A) #else @@ -253,51 +257,51 @@ extern CHARSET_INFO *national_charset_info, *table_alias_charset; TODO: separate three contexts above, move them to separate bitfields. */ -#define SELECT_DISTINCT (1L << 0) // SELECT, user -#define SELECT_STRAIGHT_JOIN (1L << 1) // SELECT, user -#define SELECT_DESCRIBE (1L << 2) // SELECT, user -#define SELECT_SMALL_RESULT (1L << 3) // SELECT, user -#define SELECT_BIG_RESULT (1L << 4) // SELECT, user -#define OPTION_FOUND_ROWS (1L << 5) // SELECT, user -#define OPTION_TO_QUERY_CACHE (1L << 6) // SELECT, user -#define SELECT_NO_JOIN_CACHE (1L << 7) // intern -#define OPTION_BIG_TABLES (1L << 8) // THD, user -#define OPTION_BIG_SELECTS (1L << 9) // THD, user -#define OPTION_LOG_OFF (1L << 10) // THD, user -#define OPTION_UPDATE_LOG (1L << 11) // THD, user, unused -#define TMP_TABLE_ALL_COLUMNS (1L << 12) // SELECT, intern -#define OPTION_WARNINGS (1L << 13) // THD, user -#define OPTION_AUTO_IS_NULL (1L << 14) // THD, user, binlog -#define OPTION_FOUND_COMMENT (1L << 15) // SELECT, intern, parser -#define OPTION_SAFE_UPDATES (1L << 16) // THD, user -#define OPTION_BUFFER_RESULT (1L << 17) // SELECT, user -#define OPTION_BIN_LOG (1L << 18) // THD, user -#define OPTION_NOT_AUTOCOMMIT (1L << 19) // THD, user -#define OPTION_BEGIN (1L << 20) // THD, intern -#define OPTION_TABLE_LOCK (1L << 21) // THD, intern -#define OPTION_QUICK (1L << 22) // SELECT (for DELETE) -#define OPTION_QUOTE_SHOW_CREATE (1L << 23) // THD, user +#define SELECT_DISTINCT (LL(1) << 0) // SELECT, user +#define SELECT_STRAIGHT_JOIN (LL(1) << 1) // SELECT, user +#define SELECT_DESCRIBE (LL(1) << 2) // SELECT, user +#define SELECT_SMALL_RESULT (LL(1) << 3) // SELECT, user +#define SELECT_BIG_RESULT (LL(1) << 4) // SELECT, user +#define OPTION_FOUND_ROWS (LL(1) << 5) // SELECT, user +#define OPTION_TO_QUERY_CACHE (LL(1) << 6) // SELECT, user +#define SELECT_NO_JOIN_CACHE (LL(1) << 7) // intern +#define OPTION_BIG_TABLES (LL(1) << 8) // THD, user +#define OPTION_BIG_SELECTS (LL(1) << 9) // THD, user +#define OPTION_LOG_OFF (LL(1) << 10) // THD, user +#define OPTION_UPDATE_LOG (LL(1) << 11) // THD, user, unused +#define TMP_TABLE_ALL_COLUMNS (LL(1) << 12) // SELECT, intern +#define OPTION_WARNINGS (LL(1) << 13) // THD, user +#define OPTION_AUTO_IS_NULL (LL(1) << 14) // THD, user, binlog +#define OPTION_FOUND_COMMENT (LL(1) << 15) // SELECT, intern, parser +#define OPTION_SAFE_UPDATES (LL(1) << 16) // THD, user +#define OPTION_BUFFER_RESULT (LL(1) << 17) // SELECT, user +#define OPTION_BIN_LOG (LL(1) << 18) // THD, user +#define OPTION_NOT_AUTOCOMMIT (LL(1) << 19) // THD, user +#define OPTION_BEGIN (LL(1) << 20) // THD, intern +#define OPTION_TABLE_LOCK (LL(1) << 21) // THD, intern +#define OPTION_QUICK (LL(1) << 22) // SELECT (for DELETE) +#define OPTION_QUOTE_SHOW_CREATE (LL(1) << 23) // THD, user /* Thr following is used to detect a conflict with DISTINCT in the user query has requested */ -#define SELECT_ALL (1L << 24) // SELECT, user, parser +#define SELECT_ALL (LL(1) << 24) // SELECT, user, parser /* Set if we are updating a non-transaction safe table */ -#define OPTION_STATUS_NO_TRANS_UPDATE (1L << 25) // THD, intern +#define OPTION_STATUS_NO_TRANS_UPDATE (LL(1) << 25) // THD, intern /* The following can be set when importing tables in a 'wrong order' to suppress foreign key checks */ -#define OPTION_NO_FOREIGN_KEY_CHECKS (1L << 26) // THD, user, binlog +#define OPTION_NO_FOREIGN_KEY_CHECKS (LL(1) << 26) // THD, user, binlog /* The following speeds up inserts to InnoDB tables by suppressing unique key checks in some cases */ -#define OPTION_RELAXED_UNIQUE_CHECKS (1L << 27) // THD, user, binlog -#define SELECT_NO_UNLOCK (1L << 28) // SELECT, intern -#define OPTION_SCHEMA_TABLE (1L << 29) // SELECT, intern +#define OPTION_RELAXED_UNIQUE_CHECKS (LL(1) << 27) // THD, user, binlog +#define SELECT_NO_UNLOCK (LL(1) << 28) // SELECT, intern +#define OPTION_SCHEMA_TABLE (LL(1) << 29) // SELECT, intern /* Flag set if setup_tables already done */ -#define OPTION_SETUP_TABLES_DONE (1L << 30) // intern +#define OPTION_SETUP_TABLES_DONE (LL(1) << 30) // intern /* If not set then the thread will ignore all warnings with level notes. */ -#define OPTION_SQL_NOTES (1UL << 31) // THD, user -/* +#define OPTION_SQL_NOTES (LL(1) << 31) // THD, user +/* Force the used temporary table to be a MyISAM table (because we will use fulltext functions when reading from it. */ @@ -327,7 +331,7 @@ extern CHARSET_INFO *national_charset_info, *table_alias_charset; #define MODE_DB2 2048 #define MODE_MAXDB 4096 #define MODE_NO_KEY_OPTIONS 8192 -#define MODE_NO_TABLE_OPTIONS 16384 +#define MODE_NO_TABLE_OPTIONS 16384 #define MODE_NO_FIELD_OPTIONS 32768 #define MODE_MYSQL323 65536 #define MODE_MYSQL40 (MODE_MYSQL323*2) @@ -414,6 +418,13 @@ void view_store_options(THD *thd, st_table_list *table, String *buff); #define STRING_BUFFER_USUAL_SIZE 80 +/* + Some defines for exit codes for ::is_equal class functions. +*/ +#define IS_EQUAL_NO 0 +#define IS_EQUAL_YES 1 +#define IS_EQUAL_PACK_LENGTH 2 + enum enum_parsing_place { NO_MATTER, @@ -476,6 +487,11 @@ inline THD *_current_thd(void) } #define current_thd _current_thd() +/* below functions are required for plugins as THD class is opaque */ +my_bool thd_in_lock_tables(const THD *thd); +my_bool thd_tablespace_op(const THD *thd); +const char *thd_proc_info(THD *thd, const char *info); + /* External variables */ @@ -495,7 +511,10 @@ typedef my_bool (*qc_engine_callback)(THD *thd, char *table_key, #include "sql_error.h" #include "field.h" /* Field definitions */ #include "protocol.h" +#include "sql_plugin.h" #include "sql_udf.h" +#include "sql_partition.h" + class user_var_entry; class Security_context; enum enum_var_type @@ -505,19 +524,19 @@ enum enum_var_type class sys_var; #include "item.h" extern my_decimal decimal_zero; +#ifdef MYSQL_SERVER typedef Comp_creator* (*chooser_compare_func_creator)(bool invert); +#endif /* sql_parse.cc */ void free_items(Item *item); void cleanup_items(Item *item); class THD; void close_thread_tables(THD *thd, bool locked=0, bool skip_derived=0); -bool check_one_table_access(THD *thd, ulong privilege, - TABLE_LIST *tables); +bool check_one_table_access(THD *thd, ulong privilege, TABLE_LIST *tables); bool check_routine_access(THD *thd,ulong want_access,char *db,char *name, bool is_proc, bool no_errors); bool check_some_access(THD *thd, ulong want_access, TABLE_LIST *table); -bool check_merge_table_access(THD *thd, char *db, - TABLE_LIST *table_list); +bool check_merge_table_access(THD *thd, char *db, TABLE_LIST *table_list); bool check_some_routine_access(THD *thd, const char *db, const char *name, bool is_proc); bool multi_update_precheck(THD *thd, TABLE_LIST *tables); bool multi_delete_precheck(THD *thd, TABLE_LIST *tables); @@ -538,12 +557,14 @@ enum enum_mysql_completiontype { COMMIT_RELEASE=-1, COMMIT=0, COMMIT_AND_CHAIN=6 }; +bool begin_trans(THD *thd); int end_trans(THD *thd, enum enum_mysql_completiontype completion); Item *negate_expression(THD *thd, Item *expr); #include "sql_class.h" #include "sql_acl.h" #include "tztime.h" +#ifdef MYSQL_SERVER #include "opt_range.h" #ifdef HAVE_QUERY_CACHE @@ -594,10 +615,17 @@ struct Query_cache_query_flags #define query_cache_invalidate_by_MyISAM_filename_ref NULL #endif /*HAVE_QUERY_CACHE*/ +uint build_table_path(char *buff, size_t bufflen, const char *db, + const char *table, const char *ext); +void write_bin_log(THD *thd, bool clear_error, + char const *query, ulong query_length); + bool mysql_create_db(THD *thd, char *db, HA_CREATE_INFO *create, bool silent); bool mysql_alter_db(THD *thd, const char *db, HA_CREATE_INFO *create); bool mysql_rm_db(THD *thd,char *db,bool if_exists, bool silent); +bool mysql_rename_db(THD *thd, LEX_STRING *old_db, LEX_STRING *new_db); void mysql_binlog_send(THD* thd, char* log_ident, my_off_t pos, ushort flags); +void mysql_client_binlog_statement(THD *thd); bool mysql_rm_table(THD *thd,TABLE_LIST *tables, my_bool if_exists, my_bool drop_temporary); int mysql_rm_table_part2(THD *thd, TABLE_LIST *tables, bool if_exists, @@ -605,10 +633,10 @@ int mysql_rm_table_part2(THD *thd, TABLE_LIST *tables, bool if_exists, int mysql_rm_table_part2_with_lock(THD *thd, TABLE_LIST *tables, bool if_exists, bool drop_temporary, bool log_query); -int quick_rm_table(enum db_type base,const char *db, - const char *table_name); +bool quick_rm_table(handlerton *base,const char *db, + const char *table_name); void close_cached_table(THD *thd, TABLE *table); -bool mysql_rename_tables(THD *thd, TABLE_LIST *table_list); +bool mysql_rename_tables(THD *thd, TABLE_LIST *table_list, bool silent); bool mysql_change_db(THD *thd,const char *name,bool no_access_check); void mysql_parse(THD *thd,char *inBuf,uint length); bool mysql_test_parse_for_slave(THD *thd,char *inBuf,uint length); @@ -634,10 +662,16 @@ bool dispatch_command(enum enum_server_command command, THD *thd, char* packet, uint packet_length); void log_slow_statement(THD *thd); bool check_dup(const char *db, const char *name, TABLE_LIST *tables); +bool append_file_to_dir(THD *thd, const char **filename_ptr, + const char *table_name); bool table_cache_init(void); void table_cache_free(void); -uint cached_tables(void); +bool table_def_init(void); +void table_def_free(void); +void assign_new_table_id(TABLE_SHARE *share); +uint cached_open_tables(void); +uint cached_table_definitions(void); void kill_mysql(void); void close_connection(THD *thd, uint errcode, bool lock); bool reload_acl_and_cache(THD *thd, ulong options, TABLE_LIST *tables, @@ -648,6 +682,22 @@ bool check_table_access(THD *thd, ulong want_access, TABLE_LIST *tables, bool no_errors); bool check_global_access(THD *thd, ulong want_access); +/* + Support routine for SQL parser on partitioning syntax +*/ +my_bool is_partition_management(LEX *lex); +/* + General routine to change field->ptr of a NULL-terminated array of Field + objects. Useful when needed to call val_int, val_str or similar and the + field data is not in table->record[0] but in some other structure. + set_key_field_ptr changes all fields of an index using a key_info object. + All methods presume that there is at least one field to change. +*/ + +void set_field_ptr(Field **ptr, const byte *new_buf, const byte *old_buf); +void set_key_field_ptr(KEY *key_info, const byte *new_buf, + const byte *old_buf); + bool mysql_backup_table(THD* thd, TABLE_LIST* table_list); bool mysql_restore_table(THD* thd, TABLE_LIST* table_list); @@ -671,6 +721,7 @@ TABLE *create_virtual_tmp_table(THD *thd, List<create_field> &field_list); bool mysql_xa_recover(THD *thd); bool check_simple_select(); +int mysql_alter_tablespace(THD* thd, st_alter_tablespace *ts_info); SORT_FIELD * make_unireg_sortorder(ORDER *order, uint *length); int setup_order(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables, @@ -733,7 +784,7 @@ bool mysql_recreate_table(THD *thd, TABLE_LIST *table_list, bool do_send_ok); bool mysql_create_like_table(THD *thd, TABLE_LIST *table, HA_CREATE_INFO *create_info, Table_ident *src_table); -bool mysql_rename_table(enum db_type base, +bool mysql_rename_table(handlerton *base, const char *old_db, const char * old_name, const char *new_db, @@ -769,15 +820,18 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, bool reset_auto_increment); bool mysql_truncate(THD *thd, TABLE_LIST *table_list, bool dont_send_ok); bool mysql_create_or_drop_trigger(THD *thd, TABLE_LIST *tables, bool create); +uint create_table_def_key(THD *thd, char *key, TABLE_LIST *table_list, + bool tmp_table); +TABLE_SHARE *get_table_share(THD *thd, TABLE_LIST *table_list, char *key, + uint key_length, uint db_flags, int *error); +void release_table_share(TABLE_SHARE *share, enum release_type type); +TABLE_SHARE *get_cached_table_share(const char *db, const char *table_name); TABLE *open_ltable(THD *thd, TABLE_LIST *table_list, thr_lock_type update); TABLE *open_table(THD *thd, TABLE_LIST *table_list, MEM_ROOT* mem, bool *refresh, uint flags); bool reopen_name_locked_table(THD* thd, TABLE_LIST* table); TABLE *find_locked_table(THD *thd, const char *db,const char *table_name); -bool reopen_table(TABLE *table,bool locked); bool reopen_tables(THD *thd,bool get_locks,bool in_refresh); -void close_old_data_files(THD *thd, TABLE *table, bool abort_locks, - bool send_refresh); bool close_data_tables(THD *thd,const char *db, const char *table_name); bool wait_for_tables(THD *thd); bool table_is_used(TABLE *table, bool wait_for_name_lock); @@ -807,6 +861,10 @@ find_field_in_table_ref(THD *thd, TABLE_LIST *table_list, Field * find_field_in_table(THD *thd, TABLE *table, const char *name, uint length, bool allow_rowid, uint *cached_field_index_ptr); +Field * +find_field_in_table_sef(TABLE *table, const char *name); + +#endif /* MYSQL_SERVER */ #ifdef HAVE_OPENSSL #include <openssl/des.h> @@ -825,6 +883,7 @@ extern pthread_mutex_t LOCK_des_key_file; bool load_des_key_file(const char *file_name); #endif /* HAVE_OPENSSL */ +#ifdef MYSQL_SERVER /* sql_do.cc */ bool mysql_do(THD *thd, List<Item> &values); @@ -848,6 +907,7 @@ int mysqld_show_variables(THD *thd,const char *wild); int mysql_find_files(THD *thd,List<char> *files, const char *db, const char *path, const char *wild, bool dir); bool mysqld_show_storage_engines(THD *thd); +bool mysqld_show_authors(THD *thd); bool mysqld_show_privileges(THD *thd); bool mysqld_show_column_types(THD *thd); bool mysqld_help (THD *thd, const char *text); @@ -856,9 +916,14 @@ void calc_sum_of_all_status(STATUS_VAR *to); void append_definer(THD *thd, String *buffer, const LEX_STRING *definer_user, const LEX_STRING *definer_host); +int add_status_vars(SHOW_VAR *list); +void remove_status_vars(SHOW_VAR *list); +void init_status_vars(); +void free_status_vars(); /* information schema */ extern LEX_STRING information_schema_name; +extern const LEX_STRING partition_keywords[]; LEX_STRING *make_lex_string(THD *thd, LEX_STRING *lex_str, const char* str, uint length, bool allocate_lex_string); @@ -951,10 +1016,10 @@ bool setup_tables(THD *thd, Name_resolution_context *context, int setup_wild(THD *thd, TABLE_LIST *tables, List<Item> &fields, List<Item> *sum_func_list, uint wild_num); bool setup_fields(THD *thd, Item** ref_pointer_array, - List<Item> &item, bool set_query_id, + List<Item> &item, ulong set_query_id, List<Item> *sum_func_list, bool allow_sum_func); inline bool setup_fields_with_no_wrap(THD *thd, Item **ref_pointer_array, - List<Item> &item, bool set_query_id, + List<Item> &item, ulong set_query_id, List<Item> *sum_func_list, bool allow_sum_func) { @@ -969,7 +1034,8 @@ int setup_conds(THD *thd, TABLE_LIST *tables, TABLE_LIST *leaves, COND **conds); int setup_ftfuncs(SELECT_LEX* select); int init_ftfuncs(THD *thd, SELECT_LEX* select, bool no_order); -void wait_for_refresh(THD *thd); +void wait_for_condition(THD *thd, pthread_mutex_t *mutex, + pthread_cond_t *cond); int open_tables(THD *thd, TABLE_LIST **tables, uint *counter, uint flags); int simple_open_n_lock_tables(THD *thd,TABLE_LIST *tables); bool open_and_lock_tables(THD *thd,TABLE_LIST *tables); @@ -977,7 +1043,7 @@ bool open_normal_and_derived_tables(THD *thd, TABLE_LIST *tables, uint flags); int lock_tables(THD *thd, TABLE_LIST *tables, uint counter, bool *need_reopen); TABLE *open_temporary_table(THD *thd, const char *path, const char *db, const char *table_name, bool link_in_list); -bool rm_temporary_table(enum db_type base, char *path); +bool rm_temporary_table(handlerton *base, char *path); void free_io_cache(TABLE *entry); void intern_close_table(TABLE *entry); bool close_thread_table(THD *thd, TABLE **table_ptr); @@ -988,15 +1054,34 @@ TABLE_LIST *find_table_in_list(TABLE_LIST *table, const char *db_name, const char *table_name); TABLE_LIST *unique_table(THD *thd, TABLE_LIST *table, TABLE_LIST *table_list); -TABLE **find_temporary_table(THD *thd, const char *db, const char *table_name); -bool close_temporary_table(THD *thd, const char *db, const char *table_name); -void close_temporary(TABLE *table, bool delete_table); +TABLE *find_temporary_table(THD *thd, const char *db, const char *table_name); +TABLE *find_temporary_table(THD *thd, TABLE_LIST *table_list); +bool close_temporary_table(THD *thd, TABLE_LIST *table_list); +void close_temporary_table(THD *thd, TABLE *table, bool free_share, + bool delete_table); +void close_temporary(TABLE *table, bool free_share, bool delete_table); bool rename_temporary_table(THD* thd, TABLE *table, const char *new_db, const char *table_name); void remove_db_from_cache(const char *db); void flush_tables(); bool is_equal(const LEX_STRING *a, const LEX_STRING *b); +#ifdef WITH_PARTITION_STORAGE_ENGINE +uint fast_alter_partition_table(THD *thd, TABLE *table, + ALTER_INFO *alter_info, + HA_CREATE_INFO *create_info, + TABLE_LIST *table_list, + List<create_field> *create_list, + List<Key> *key_list, const char *db, + const char *table_name, + uint fast_alter_partition); +uint prep_alter_part_table(THD *thd, TABLE *table, ALTER_INFO *alter_info, + HA_CREATE_INFO *create_info, + handlerton *old_db_type, + bool *partition_changed, + uint *fast_alter_partition); +#endif + /* bits for last argument to remove_table_from_cache() */ #define RTFC_NO_FLAG 0x0000 #define RTFC_OWNED_BY_THD_FLAG 0x0001 @@ -1005,7 +1090,37 @@ bool is_equal(const LEX_STRING *a, const LEX_STRING *b); bool remove_table_from_cache(THD *thd, const char *db, const char *table, uint flags); -bool close_cached_tables(THD *thd, bool wait_for_refresh, TABLE_LIST *tables); +typedef struct st_lock_param_type +{ + ulonglong copied; + ulonglong deleted; + THD *thd; + HA_CREATE_INFO *create_info; + List<create_field> *create_list; + List<create_field> new_create_list; + List<Key> *key_list; + List<Key> new_key_list; + TABLE *table; + KEY *key_info_buffer; + const char *db; + const char *table_name; + const void *pack_frm_data; + enum thr_lock_type old_lock_type; + uint key_count; + uint db_options; + uint pack_frm_len; +} ALTER_PARTITION_PARAM_TYPE; + +void mem_alloc_error(size_t size); +#define WFRM_INITIAL_WRITE 1 +#define WFRM_CREATE_HANDLER_FILES 2 +#define WFRM_PACK_FRM 4 +bool mysql_write_frm(ALTER_PARTITION_PARAM_TYPE *lpt, uint flags); +bool abort_and_upgrade_lock(ALTER_PARTITION_PARAM_TYPE *lpt); +void close_open_tables_and_downgrade(ALTER_PARTITION_PARAM_TYPE *lpt); +void mysql_wait_completed_table(ALTER_PARTITION_PARAM_TYPE *lpt, TABLE *my_table); + +bool close_cached_tables(THD *thd, bool wait_for_refresh, TABLE_LIST *tables, bool have_lock = FALSE); void copy_field_from_tmp_record(Field *field,int offset); bool fill_record(THD *thd, Field **field, List<Item> &values, bool ignore_errors); @@ -1056,6 +1171,8 @@ extern ulong volatile manager_status; extern bool volatile manager_thread_in_use, mqh_used; extern pthread_t manager_thread; pthread_handler_t handle_manager(void *arg); +bool mysql_manager_submit(void (*action)()); + /* sql_test.cc */ #ifndef DBUG_OFF @@ -1067,7 +1184,7 @@ void print_plan(JOIN* join, double read_time, double record_count, #endif void mysql_print_status(); /* key.cc */ -int find_ref_key(TABLE *form,Field *field, uint *offset); +int find_ref_key(KEY *key, uint key_count, Field *field, uint *key_length); void key_copy(byte *to_key, byte *from_record, KEY *key_info, uint key_length); void key_restore(byte *to_record, byte *from_key, KEY *key_info, uint key_length); @@ -1075,19 +1192,33 @@ bool key_cmp_if_same(TABLE *form,const byte *key,uint index,uint key_length); void key_unpack(String *to,TABLE *form,uint index); bool check_if_key_used(TABLE *table, uint idx, List<Item> &fields); int key_cmp(KEY_PART_INFO *key_part, const byte *key, uint key_length); +int key_rec_cmp(void *key_info, byte *a, byte *b); bool init_errmessage(void); +#endif /* MYSQL_SERVER */ void sql_perror(const char *message); -void vprint_msg_to_log(enum loglevel level, const char *format, va_list args); +int vprint_msg_to_log(enum loglevel level, const char *format, va_list args); void sql_print_error(const char *format, ...); void sql_print_warning(const char *format, ...); void sql_print_information(const char *format, ...); +/* type of the log table */ +#define QUERY_LOG_SLOW 1 +#define QUERY_LOG_GENERAL 2 + +int error_log_print(enum loglevel level, const char *format, + va_list args); + +bool slow_log_print(THD *thd, const char *query, uint query_length, + time_t query_start_arg); +bool general_log_print(THD *thd, enum enum_server_command command, + const char *format,...); bool fn_format_relative_to_data_home(my_string to, const char *name, const char *dir, const char *extension); +#ifdef MYSQL_SERVER File open_binlog(IO_CACHE *log, const char *log_file_name, const char **errmsg); @@ -1110,10 +1241,12 @@ uint check_word(TYPELIB *lib, const char *val, const char *end, bool is_keyword(const char *name, uint len); #define MY_DB_OPT_FILE "db.opt" +bool my_database_names_init(void); +void my_database_names_free(void); bool load_db_opt(THD *thd, const char *path, HA_CREATE_INFO *create); -bool my_dbopt_init(void); void my_dbopt_cleanup(void); -void my_dbopt_free(void); +extern int creating_database; // How many database locks are made +extern int creating_table; // How many mysql_create_table() are running /* External variables @@ -1125,7 +1258,7 @@ extern char *mysql_data_home,server_version[SERVER_VERSION_LENGTH], def_ft_boolean_syntax[sizeof(ft_boolean_syntax)]; #define mysql_tmpdir (my_tmpdir(&mysql_tmpdir_list)) extern MY_TMPDIR mysql_tmpdir_list; -extern const char *command_name[]; +extern LEX_STRING command_name[]; extern const char *first_keyword, *my_localhost, *delayed_user, *binary_keyword; extern const char **errmesg; /* Error messages */ extern const char *myisam_recover_options_str; @@ -1139,6 +1272,7 @@ extern Lt_creator lt_creator; extern Ge_creator ge_creator; extern Le_creator le_creator; extern char language[FN_REFLEN], reg_ext[FN_EXTLEN]; +extern uint reg_ext_length; extern char glob_hostname[FN_REFLEN], mysql_home[FN_REFLEN]; extern char pidfile_name[FN_REFLEN], system_time_zone[30], *opt_init_file; extern char log_error_file[FN_REFLEN], *opt_tc_log_file; @@ -1156,7 +1290,7 @@ extern ulong delayed_rows_in_use,delayed_insert_errors; extern ulong slave_open_temp_tables; extern ulong query_cache_size, query_cache_min_res_unit; extern ulong slow_launch_threads, slow_launch_time; -extern ulong table_cache_size; +extern ulong table_cache_size, table_def_size; extern ulong max_connections,max_connect_errors, connect_timeout; extern ulong slave_net_timeout, slave_trans_retries; extern uint max_user_connections; @@ -1164,6 +1298,9 @@ extern ulong what_to_log,flush_time; extern ulong query_buff_size, thread_stack; extern ulong binlog_cache_size, max_binlog_cache_size, open_files_limit; extern ulong max_binlog_size, max_relay_log_size; +#ifdef HAVE_ROW_BASED_REPLICATION +extern ulong opt_binlog_rows_event_max_size; +#endif extern ulong rpl_recovery_rank, thread_cache_size; extern ulong back_log; extern ulong specialflag, current_pid; @@ -1185,7 +1322,7 @@ extern bool volatile abort_loop, shutdown_in_progress, grant_option; extern bool mysql_proc_table_exists; extern uint volatile thread_count, thread_running, global_read_lock; extern my_bool opt_sql_bin_update, opt_safe_user_create, opt_no_mix_types; -extern my_bool opt_safe_show_db, opt_local_infile; +extern my_bool opt_safe_show_db, opt_local_infile, opt_myisam_use_mmap; extern my_bool opt_slave_compressed_protocol, use_temp_pool; extern my_bool opt_readonly, lower_case_file_system; extern my_bool opt_enable_named_pipe, opt_sync_frm, opt_allow_suspicious_udfs; @@ -1200,12 +1337,14 @@ extern char *default_tz_name; extern my_bool opt_large_pages; extern uint opt_large_page_size; -extern MYSQL_LOG mysql_log,mysql_slow_log,mysql_bin_log; +extern MYSQL_LOG mysql_bin_log; +extern LOGGER logger; +extern TABLE_LIST general_log, slow_log; extern FILE *bootstrap_file; extern int bootstrap_error; extern FILE *stderror_file; extern pthread_key(MEM_ROOT**,THR_MALLOC); -extern pthread_mutex_t LOCK_mysql_create_db,LOCK_Acl,LOCK_open, +extern pthread_mutex_t LOCK_mysql_create_db,LOCK_Acl,LOCK_open, LOCK_lock_db, LOCK_thread_count,LOCK_mapped_file,LOCK_user_locks, LOCK_status, LOCK_error_log, LOCK_delayed_insert, LOCK_uuid_generator, LOCK_delayed_status, LOCK_delayed_create, LOCK_crypt, LOCK_timezone, @@ -1215,6 +1354,9 @@ extern pthread_mutex_t LOCK_mysql_create_db,LOCK_Acl,LOCK_open, #ifdef HAVE_OPENSSL extern pthread_mutex_t LOCK_des_key_file; #endif +extern pthread_mutex_t LOCK_server_started; +extern pthread_cond_t COND_server_started; +extern int mysqld_server_started; extern rw_lock_t LOCK_grant, LOCK_sys_init_connect, LOCK_sys_init_slave; extern pthread_cond_t COND_refresh, COND_thread_count, COND_manager; extern pthread_attr_t connection_attrib; @@ -1223,7 +1365,7 @@ extern I_List<NAMED_LIST> key_caches; extern MY_BITMAP temp_pool; extern String my_empty_string; extern const String my_null_string; -extern SHOW_VAR init_vars[],status_vars[], internal_vars[]; +extern SHOW_VAR init_vars[], status_vars[], internal_vars[]; extern struct system_variables global_system_variables; extern struct system_variables max_system_variables; extern struct system_status_var global_status_var; @@ -1233,20 +1375,74 @@ extern const char *opt_date_time_formats[]; extern KNOWN_DATE_TIME_FORMAT known_date_time_formats[]; extern String null_string; -extern HASH open_cache; +extern HASH open_cache, lock_db_cache; extern TABLE *unused_tables; -extern I_List<i_string> binlog_do_db, binlog_ignore_db; extern const char* any_db; extern struct my_option my_long_options[]; extern const LEX_STRING view_type; /* optional things, have_* variables */ -extern SHOW_COMP_OPTION have_isam, have_innodb, have_berkeley_db; -extern SHOW_COMP_OPTION have_example_db, have_archive_db, have_csv_db; +#ifdef WITH_INNOBASE_STORAGE_ENGINE +extern handlerton innobase_hton; +#define have_innodb innobase_hton.state +#else +extern SHOW_COMP_OPTION have_innodb; +#endif +#ifdef WITH_BERKELEY_STORAGE_ENGINE +extern handlerton berkeley_hton; +#define have_berkeley_db berkeley_hton.state +#else +extern SHOW_COMP_OPTION have_berkeley_db; +#endif +#ifdef WITH_EXAMPLE_STORAGE_ENGINE +extern handlerton example_hton; +#define have_example_db example_hton.state +#else +extern SHOW_COMP_OPTION have_example_db; +#endif +#ifdef WITH_ARCHIVE_STORAGE_ENGINE +extern handlerton archive_hton; +#define have_archive_db archive_hton.state +#else +extern SHOW_COMP_OPTION have_archive_db; +#endif +#ifdef WITH_CSV_STORAGE_ENGINE +extern handlerton tina_hton; +#define have_csv_db tina_hton.state +#else +extern SHOW_COMP_OPTION have_csv_db; +#endif +#ifdef WITH_FEDERATED_STORAGE_ENGINE +extern handlerton federated_hton; +#define have_federated_db federated_hton.state +#else extern SHOW_COMP_OPTION have_federated_db; +#endif +#ifdef WITH_BLACKHOLE_STORAGE_ENGINE +extern handlerton blackhole_hton; +#define have_blackhole_db blackhole_hton.state +#else extern SHOW_COMP_OPTION have_blackhole_db; +#endif +#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE +extern handlerton ndbcluster_hton; +#define have_ndbcluster ndbcluster_hton.state +#else extern SHOW_COMP_OPTION have_ndbcluster; +#endif +#ifdef WITH_PARTITION_STORAGE_ENGINE +extern handlerton partition_hton; +#define have_partition_db partition_hton.state +#else +extern SHOW_COMP_OPTION have_partition_db; +#endif + +extern handlerton myisam_hton; +extern handlerton myisammrg_hton; +extern handlerton heap_hton; + +extern SHOW_COMP_OPTION have_row_based_replication; extern SHOW_COMP_OPTION have_raid, have_openssl, have_symlink; extern SHOW_COMP_OPTION have_query_cache; extern SHOW_COMP_OPTION have_geometry, have_rtree_keys; @@ -1272,7 +1468,9 @@ void mysql_unlock_tables(THD *thd, MYSQL_LOCK *sql_lock); void mysql_unlock_read_tables(THD *thd, MYSQL_LOCK *sql_lock); void mysql_unlock_some_tables(THD *thd, TABLE **table,uint count); void mysql_lock_remove(THD *thd, MYSQL_LOCK *locked,TABLE *table); -void mysql_lock_abort(THD *thd, TABLE *table); +void mysql_lock_abort(THD *thd, TABLE *table, bool upgrade_lock); +void mysql_lock_downgrade_write(THD *thd, TABLE *table, + thr_lock_type new_lock_type); bool mysql_lock_abort_for_thread(THD *thd, TABLE *table); MYSQL_LOCK *mysql_lock_merge(MYSQL_LOCK *a,MYSQL_LOCK *b); TABLE_LIST *mysql_lock_have_duplicate(THD *thd, TABLE_LIST *needle, @@ -1300,23 +1498,34 @@ void unlock_table_names(THD *thd, TABLE_LIST *table_list, void unireg_init(ulong options); void unireg_end(void); -bool mysql_create_frm(THD *thd, my_string file_name, +bool mysql_create_frm(THD *thd, const char *file_name, const char *db, const char *table, HA_CREATE_INFO *create_info, List<create_field> &create_field, uint key_count,KEY *key_info,handler *db_type); -int rea_create_table(THD *thd, my_string file_name, - const char *db, const char *table, +int rea_create_table(THD *thd, const char *path, + const char *db, const char *table_name, HA_CREATE_INFO *create_info, - List<create_field> &create_field, - uint key_count,KEY *key_info); + List<create_field> &create_field, + uint key_count,KEY *key_info, + handler *file); int format_number(uint inputflag,uint max_length,my_string pos,uint length, my_string *errpos); -int openfrm(THD *thd, const char *name,const char *alias,uint filestat, - uint prgflag, uint ha_open_flags, TABLE *outparam); + +/* table.cc */ +TABLE_SHARE *alloc_table_share(TABLE_LIST *table_list, char *key, + uint key_length); +void init_tmp_table_share(TABLE_SHARE *share, const char *key, uint key_length, + const char *table_name, const char *path); +void free_table_share(TABLE_SHARE *share); +int open_table_def(THD *thd, TABLE_SHARE *share, uint db_flags); +void open_table_error(TABLE_SHARE *share, int error, int db_errno, int errarg); +int open_table_from_share(THD *thd, TABLE_SHARE *share, const char *alias, + uint db_stat, uint prgflag, uint ha_open_flags, + TABLE *outparam, bool is_create_table); int readfrm(const char *name, const void** data, uint* length); int writefrm(const char* name, const void* data, uint len); -int closefrm(TABLE *table); +int closefrm(TABLE *table, bool free_share); int read_string(File file, gptr *to, uint length); void free_blobs(TABLE *table); int set_zone(int nr,int min_zone,int max_zone); @@ -1334,6 +1543,9 @@ void calc_time_from_sec(TIME *to, long seconds, long microseconds); void make_truncated_value_warning(THD *thd, const char *str_val, uint str_length, timestamp_type time_type, const char *field_name); + +bool date_add_interval(TIME *ltime, interval_type int_type, INTERVAL interval); + extern DATE_TIME_FORMAT *date_time_format_make(timestamp_type format_type, const char *format_str, uint format_length); @@ -1375,8 +1587,8 @@ ulong make_new_entry(File file,uchar *fileinfo,TYPELIB *formnames, const char *newname); ulong next_io_size(ulong pos); void append_unescaped(String *res, const char *pos, uint length); -int create_frm(THD *thd, char *name, const char *db, const char *table, - uint reclength,uchar *fileinfo, +int create_frm(THD *thd, const char *name, const char *db, const char *table, + uint reclength, uchar *fileinfo, HA_CREATE_INFO *create_info, uint keys); void update_create_info_from_table(HA_CREATE_INFO *info, TABLE *form); int rename_file_ext(const char * from,const char * to,const char * ext); @@ -1386,7 +1598,15 @@ bool check_table_name(const char *name, uint length); char *get_field(MEM_ROOT *mem, Field *field); bool get_field(MEM_ROOT *mem, Field *field, class String *res); int wild_case_compare(CHARSET_INFO *cs, const char *str,const char *wildstr); - +char *fn_rext(char *name); + +/* Conversion functions */ +uint strconvert(CHARSET_INFO *from_cs, const char *from, + CHARSET_INFO *to_cs, char *to, uint to_length, uint *errors); +uint filename_to_tablename(const char *from, char *to, uint to_length); +uint tablename_to_filename(const char *from, char *to, uint to_length); +uint build_table_filename(char *buff, size_t bufflen, const char *db, + const char *table, const char *ext); /* from hostname.cc */ struct in_addr; my_string ip_to_hostname(struct in_addr *in,uint *errors); @@ -1546,4 +1766,8 @@ inline void kill_delayed_threads(void) {} #define check_stack_overrun(A, B, C) 0 #endif +/* Used by handlers to store things in schema tables */ +bool schema_table_store_record(THD *thd, TABLE *table); + +#endif /* MYSQL_SERVER */ #endif /* MYSQL_CLIENT */ diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 37a135fa063..4f8944593bc 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -19,28 +19,22 @@ #include <my_dir.h> #include "slave.h" #include "sql_repl.h" +#include "rpl_filter.h" #include "repl_failsafe.h" #include "stacktrace.h" #include "mysqld_suffix.h" #include "mysys_err.h" -#ifdef HAVE_BERKELEY_DB -#include "ha_berkeley.h" -#endif -#ifdef HAVE_INNOBASE_DB -#include "ha_innodb.h" -#endif +#include "event.h" + #include "ha_myisam.h" -#ifdef HAVE_NDBCLUSTER_DB -#include "ha_ndbcluster.h" -#endif -#ifdef HAVE_INNOBASE_DB +#ifdef WITH_INNOBASE_STORAGE_ENGINE #define OPT_INNODB_DEFAULT 1 #else #define OPT_INNODB_DEFAULT 0 #endif #define OPT_BDB_DEFAULT 0 -#ifdef HAVE_NDBCLUSTER_DB +#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE #define OPT_NDBCLUSTER_DEFAULT 0 #if defined(NOT_ENOUGH_TESTED) \ && defined(NDB_SHM_TRANSPORTER) && MYSQL_VERSION_ID >= 50000 @@ -140,6 +134,13 @@ int deny_severity = LOG_WARNING; #define zVOLSTATE_DEACTIVE 2 #define zVOLSTATE_MAINTENANCE 3 +#undef __event_h__ +#include <../include/event.h> +/* + This #undef exists here because both libc of NetWare and MySQL have + files named event.h which causes compilation errors. +*/ + #include <nks/netware.h> #include <nks/vm.h> #include <library.h> @@ -303,8 +304,16 @@ arg_cmp_func Arg_comparator::comparator_matrix[5][2] = {&Arg_comparator::compare_row, &Arg_comparator::compare_e_row}, {&Arg_comparator::compare_decimal, &Arg_comparator::compare_e_decimal}}; +const char *log_output_names[] = +{ "NONE", "FILE", "TABLE", NullS}; +TYPELIB log_output_typelib= {array_elements(log_output_names)-1,"", + log_output_names, NULL}; + /* static variables */ +/* the default log output is log tables */ +static const char *log_output_str= "TABLE"; +static ulong log_output_options= LOG_TABLE; static bool lower_case_table_names_used= 0; static bool volatile select_thread_in_use, signal_thread_in_use; static bool volatile ready_to_exit; @@ -330,7 +339,7 @@ static I_List<THD> thread_cache; static pthread_cond_t COND_thread_cache, COND_flush_thread_cache; -#ifdef HAVE_BERKELEY_DB +#ifdef WITH_BERKELEY_STORAGE_ENGINE static my_bool opt_sync_bdb_logs; #endif @@ -355,7 +364,68 @@ my_bool opt_safe_user_create = 0, opt_no_mix_types = 0; my_bool opt_show_slave_auth_info, opt_sql_bin_update = 0; my_bool opt_log_slave_updates= 0; my_bool opt_innodb; -#ifdef HAVE_NDBCLUSTER_DB +#ifdef WITH_INNOBASE_STORAGE_ENGINE +extern SHOW_VAR innodb_status_variables[]; +extern uint innobase_init_flags, innobase_lock_type; +extern uint innobase_flush_log_at_trx_commit; +extern ulong innobase_cache_size, innobase_fast_shutdown; +extern ulong innobase_large_page_size; +extern char *innobase_home, *innobase_tmpdir, *innobase_logdir; +extern long innobase_lock_scan_time; +extern long innobase_mirrored_log_groups, innobase_log_files_in_group; +extern longlong innobase_log_file_size; +extern long innobase_log_buffer_size; +extern longlong innobase_buffer_pool_size; +extern long innobase_additional_mem_pool_size; +extern long innobase_buffer_pool_awe_mem_mb; +extern long innobase_file_io_threads, innobase_lock_wait_timeout; +extern long innobase_force_recovery; +extern long innobase_open_files; +extern char *innobase_data_home_dir, *innobase_data_file_path; +extern char *innobase_log_group_home_dir, *innobase_log_arch_dir; +extern char *innobase_unix_file_flush_method; +/* The following variables have to be my_bool for SHOW VARIABLES to work */ +extern my_bool innobase_log_archive, + innobase_use_doublewrite, + innobase_use_checksums, + innobase_use_large_pages, + innobase_use_native_aio, + innobase_file_per_table, innobase_locks_unsafe_for_binlog, + innobase_create_status_file; +extern my_bool innobase_very_fast_shutdown; /* set this to 1 just before + calling innobase_end() if you want + InnoDB to shut down without + flushing the buffer pool: this + is equivalent to a 'crash' */ +extern "C" { +extern ulong srv_max_buf_pool_modified_pct; +extern ulong srv_max_purge_lag; +extern ulong srv_auto_extend_increment; +extern ulong srv_n_spin_wait_rounds; +extern ulong srv_n_free_tickets_to_enter; +extern ulong srv_thread_sleep_delay; +extern ulong srv_thread_concurrency; +extern ulong srv_commit_concurrency; +} +#endif +#ifdef WITH_BERKELEY_STORAGE_ENGINE +#ifndef HAVE_U_INT32_T +typedef unsigned int u_int32_t; +#endif +extern const u_int32_t bdb_DB_TXN_NOSYNC, bdb_DB_RECOVER, bdb_DB_PRIVATE, + bdb_DB_DIRECT_DB, bdb_DB_DIRECT_LOG; +extern bool berkeley_shared_data; +extern u_int32_t berkeley_init_flags,berkeley_env_flags, berkeley_lock_type, + berkeley_lock_types[]; +extern ulong berkeley_max_lock, berkeley_log_buffer_size; +extern ulonglong berkeley_cache_size; +extern ulong berkeley_region_size, berkeley_cache_parts; +extern char *berkeley_home, *berkeley_tmpdir, *berkeley_logdir; +extern long berkeley_lock_scan_time; +extern TYPELIB berkeley_lock_typelib; +#endif + +#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE const char *opt_ndbcluster_connectstring= 0; const char *opt_ndb_connectstring= 0; char opt_ndb_constrbuf[1024]; @@ -364,6 +434,17 @@ my_bool opt_ndb_shm, opt_ndb_optimized_node_selection; ulong opt_ndb_cache_check_time; const char *opt_ndb_mgmd; ulong opt_ndb_nodeid; +ulong ndb_extra_logging; +#ifdef HAVE_NDB_BINLOG +ulong ndb_report_thresh_binlog_epoch_slip; +ulong ndb_report_thresh_binlog_mem_usage; +#endif + +extern SHOW_VAR ndb_status_variables[]; +extern const char *ndb_distribution_names[]; +extern TYPELIB ndb_distribution_typelib; +extern const char *opt_ndb_distribution; +extern enum ndb_distribution opt_ndb_distribution_id; #endif my_bool opt_readonly, use_temp_pool, relay_log_purge; my_bool opt_sync_frm, opt_allow_suspicious_udfs; @@ -371,6 +452,7 @@ my_bool opt_secure_auth= 0; my_bool opt_log_slow_admin_statements= 0; my_bool lower_case_file_system= 0; my_bool opt_large_pages= 0; +my_bool opt_myisam_use_mmap= 0; uint opt_large_page_size= 0; my_bool opt_old_style_user_limits= 0, trust_function_creators= 0; /* @@ -382,6 +464,16 @@ volatile bool mqh_used = 0; my_bool opt_noacl; my_bool sp_automatic_privileges= 1; +#ifdef HAVE_ROW_BASED_REPLICATION +ulong opt_binlog_rows_event_max_size; +const char *binlog_format_names[]= {"STATEMENT", "ROW", "MIXED", NullS}; +#else +const char *binlog_format_names[]= {"STATEMENT", NullS}; +#endif +TYPELIB binlog_format_typelib= + { array_elements(binlog_format_names)-1,"", + binlog_format_names, NULL }; + #ifdef HAVE_INITGROUPS static bool calling_initgroups= FALSE; /* Used in SIGSEGV handler. */ #endif @@ -393,7 +485,8 @@ uint tc_heuristic_recover= 0; uint volatile thread_count, thread_running; ulonglong thd_startup_options; ulong back_log, connect_timeout, concurrency, server_id; -ulong table_cache_size, thread_stack, what_to_log; +ulong table_cache_size, table_def_size; +ulong thread_stack, what_to_log; ulong query_buff_size, slow_launch_time, slave_open_temp_tables; ulong open_files_limit, max_binlog_size, max_relay_log_size; ulong slave_net_timeout, slave_trans_retries; @@ -424,7 +517,7 @@ char mysql_real_data_home[FN_REFLEN], language[FN_REFLEN], reg_ext[FN_EXTLEN], mysql_charsets_dir[FN_REFLEN], *opt_init_file, *opt_tc_log_file, def_ft_boolean_syntax[sizeof(ft_boolean_syntax)]; - +uint reg_ext_length; const key_map key_map_empty(0); key_map key_map_full(0); // Will be initialized later @@ -454,12 +547,10 @@ FILE *bootstrap_file; int bootstrap_error; FILE *stderror_file=0; -I_List<i_string_pair> replicate_rewrite_db; -I_List<i_string> replicate_do_db, replicate_ignore_db; -// allow the user to tell us which db to replicate and which to ignore -I_List<i_string> binlog_do_db, binlog_ignore_db; I_List<THD> threads; I_List<NAMED_LIST> key_caches; +Rpl_filter* rpl_filter; +Rpl_filter* binlog_filter; struct system_variables global_system_variables; struct system_variables max_system_variables; @@ -472,13 +563,10 @@ CHARSET_INFO *system_charset_info, *files_charset_info ; CHARSET_INFO *national_charset_info, *table_alias_charset; CHARSET_INFO *character_set_filesystem; -SHOW_COMP_OPTION have_berkeley_db, have_innodb, have_isam, have_ndbcluster, - have_example_db, have_archive_db, have_csv_db; -SHOW_COMP_OPTION have_federated_db; -SHOW_COMP_OPTION have_raid, have_openssl, have_symlink, have_query_cache; +SHOW_COMP_OPTION have_row_based_replication; +SHOW_COMP_OPTION have_openssl, have_symlink, have_query_cache; SHOW_COMP_OPTION have_geometry, have_rtree_keys; SHOW_COMP_OPTION have_crypt, have_compress; -SHOW_COMP_OPTION have_blackhole_db; /* Thread specific variables */ @@ -498,6 +586,10 @@ rw_lock_t LOCK_grant, LOCK_sys_init_connect, LOCK_sys_init_slave; pthread_cond_t COND_refresh,COND_thread_count; pthread_t signal_thread; pthread_attr_t connection_attrib; +pthread_mutex_t LOCK_server_started; +pthread_cond_t COND_server_started; + +int mysqld_server_started= 0; File_parser_dummy_hook file_parser_dummy_hook; @@ -511,6 +603,7 @@ char *opt_relay_logname = 0, *opt_relaylog_index_name=0; my_bool master_ssl; char *master_ssl_key, *master_ssl_cert; char *master_ssl_ca, *master_ssl_capath, *master_ssl_cipher; +char *opt_logname, *opt_slow_logname; /* Static variables */ @@ -518,8 +611,8 @@ static bool kill_in_progress, segfaulted; static my_bool opt_do_pstack, opt_bootstrap, opt_myisam_log; static int cleanup_done; static ulong opt_specialflag, opt_myisam_block_size; -static char *opt_logname, *opt_update_logname, *opt_binlog_index_name; -static char *opt_slow_logname, *opt_tc_heuristic_recover; +static char *opt_update_logname, *opt_binlog_index_name; +static char *opt_tc_heuristic_recover; static char *mysql_home_ptr, *pidfile_name_ptr; static char **defaults_argv; static char *opt_bin_logname; @@ -984,12 +1077,8 @@ pthread_handler_t kill_server_thread(void *arg __attribute__((unused))) extern "C" sig_handler print_signal_warning(int sig) { - if (!DBUG_IN_USE) - { - if (global_system_variables.log_warnings) - sql_print_warning("Got signal %d from thread %d", - sig,my_thread_id()); - } + if (global_system_variables.log_warnings) + sql_print_warning("Got signal %d from thread %d", sig,my_thread_id()); #ifdef DONT_REMEMBER_SIGNAL my_sigset(sig,print_signal_warning); /* int. thread system calls */ #endif @@ -1046,8 +1135,13 @@ void clean_up(bool print_message) if (cleanup_done++) return; /* purecov: inspected */ - mysql_log.cleanup(); - mysql_slow_log.cleanup(); + logger.cleanup_base(); + + /* + make sure that handlers finish up + what they have that is dependent on the binlog + */ + ha_binlog_end(current_thd); mysql_bin_log.cleanup(); #ifdef HAVE_REPLICATION @@ -1055,32 +1149,34 @@ void clean_up(bool print_message) bitmap_free(&slave_error_mask); #endif my_tz_free(); - my_dbopt_free(); + my_database_names_free(); #ifndef NO_EMBEDDED_ACCESS_CHECKS acl_free(1); grant_free(); #endif query_cache_destroy(); table_cache_free(); + table_def_free(); hostname_cache_free(); item_user_lock_free(); lex_free(); /* Free some memory */ set_var_free(); free_charsets(); + (void) ha_panic(HA_PANIC_CLOSE); /* close all tables and logs */ #ifdef HAVE_DLOPEN if (!opt_noacl) + { udf_free(); + } #endif - (void) ha_panic(HA_PANIC_CLOSE); /* close all tables and logs */ + plugin_free(); if (tc_log) tc_log->close(); xid_cache_free(); delete_elements(&key_caches, (void (*)(const char*, gptr)) free_key_cache); multi_keycache_free(); + free_status_vars(); end_thr_alarm(1); /* Free allocated memory */ -#ifdef USE_RAID - end_raid(); -#endif my_free_open_file_info(); my_free((char*) global_system_variables.date_format, MYF(MY_ALLOW_ZERO_PTR)); @@ -1102,12 +1198,9 @@ void clean_up(bool print_message) free_max_user_conn(); #ifdef HAVE_REPLICATION end_slave_list(); - free_list(&replicate_do_db); - free_list(&replicate_ignore_db); - free_list(&binlog_do_db); - free_list(&binlog_ignore_db); - free_list(&replicate_rewrite_db); #endif + delete binlog_filter; + delete rpl_filter; #ifdef HAVE_OPENSSL if (ssl_acceptor_fd) my_free((gptr) ssl_acceptor_fd, MYF(MY_ALLOW_ZERO_PTR)); @@ -1133,6 +1226,8 @@ void clean_up(bool print_message) /* do the broadcast inside the lock to ensure that my_end() is not called */ (void) pthread_cond_broadcast(&COND_thread_count); (void) pthread_mutex_unlock(&LOCK_thread_count); + logger.cleanup_end(); + /* The following lines may never be executed as the main thread may have killed us @@ -1167,6 +1262,7 @@ static void wait_for_signal_thread_to_end() static void clean_up_mutexes() { (void) pthread_mutex_destroy(&LOCK_mysql_create_db); + (void) pthread_mutex_destroy(&LOCK_lock_db); (void) pthread_mutex_destroy(&LOCK_Acl); (void) rwlock_destroy(&LOCK_grant); (void) pthread_mutex_destroy(&LOCK_open); @@ -1364,7 +1460,7 @@ static void network_init(void) uint waited; uint this_wait; uint retry; - DBUG_ENTER("server_init"); + DBUG_ENTER("network_init"); LINT_INIT(ret); set_ports(); @@ -1603,7 +1699,7 @@ void end_thread(THD *thd, bool put_in_cache) ! abort_loop && !kill_cached_threads) { /* Don't kill the thread, just put it in cache for reuse */ - DBUG_PRINT("info", ("Adding thread to cache")) + DBUG_PRINT("info", ("Adding thread to cache")); cached_thread_count++; while (!abort_loop && ! wake_thread && ! kill_cached_threads) (void) pthread_cond_wait(&COND_thread_cache, &LOCK_thread_count); @@ -1624,13 +1720,13 @@ void end_thread(THD *thd, bool put_in_cache) } } - DBUG_PRINT("info", ("sending a broadcast")) + DBUG_PRINT("info", ("sending a broadcast")); /* Tell main we are ready */ (void) pthread_mutex_unlock(&LOCK_thread_count); /* It's safe to broadcast outside a lock (COND... is not deleted here) */ (void) pthread_cond_broadcast(&COND_thread_count); - DBUG_PRINT("info", ("unlocked thread_count mutex")) + DBUG_PRINT("info", ("unlocked thread_count mutex")); #ifdef ONE_THREAD if (!(test_flags & TEST_NO_THREADS)) // For debugging under Linux #endif @@ -2290,6 +2386,9 @@ pthread_handler_t signal_hand(void *arg __attribute__((unused))) #ifdef EXTRA_DEBUG sql_print_information("Got signal %d to shutdown mysqld",sig); #endif + /* switch to the old log message processing */ + logger.set_handlers(LOG_FILE, opt_slow_log ? LOG_FILE:LOG_NONE, + opt_log ? LOG_FILE:LOG_NONE); DBUG_PRINT("info",("Got signal: %d abort_loop: %d",sig,abort_loop)); if (!abort_loop) { @@ -2317,6 +2416,9 @@ pthread_handler_t signal_hand(void *arg __attribute__((unused))) REFRESH_THREADS | REFRESH_HOSTS), (TABLE_LIST*) 0, ¬_used); // Flush logs } + /* reenable logs after the options were reloaded */ + logger.set_handlers(LOG_FILE, opt_slow_log ? LOG_TABLE:LOG_NONE, + opt_log ? LOG_TABLE:LOG_NONE); break; #ifdef USE_ONE_SIGNAL_HAND case THR_SERVER_ALARM: @@ -2482,7 +2584,7 @@ pthread_handler_t handle_shutdown(void *arg) static const char *load_default_groups[]= { -#ifdef HAVE_NDBCLUSTER_DB +#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE "mysql_cluster", #endif "mysqld","server", MYSQL_BASE_VERSION, 0, 0}; @@ -2579,15 +2681,34 @@ static int init_common_variables(const char *conf_file_name, int argc, global MYSQL_LOGs in their constructors, because then they would be inited before MY_INIT(). So we do it here. */ - mysql_log.init_pthread_objects(); - mysql_slow_log.init_pthread_objects(); mysql_bin_log.init_pthread_objects(); - + if (gethostname(glob_hostname,sizeof(glob_hostname)-4) < 0) strmov(glob_hostname,"mysql"); strmake(pidfile_name, glob_hostname, sizeof(pidfile_name)-5); strmov(fn_ext(pidfile_name),".pid"); // Add proper extension + /* + Add server status variables to the dynamic list of + status variables that is shown by SHOW STATUS. + Later, in plugin_init, plugin_load, and mysql_install_plugin + new entries could be added to that list. + */ + if (add_status_vars(status_vars)) + return 1; // an error was already reported + + if (plugin_init()) + { + sql_print_error("Failed to init plugins."); + return 1; + } + + if (ha_register_builtin_plugins()) + { + sql_print_error("Failed to register built-in storage engines."); + return 1; + } + load_defaults(conf_file_name, groups, &argc, &argv); defaults_argv=argv; get_options(argc,argv); @@ -2602,7 +2723,7 @@ static int init_common_variables(const char *conf_file_name, int argc, { my_use_large_pages= 1; my_large_page_size= opt_large_page_size; -#ifdef HAVE_INNOBASE_DB +#ifdef WITH_INNOBASE_STORAGE_ENGINE innobase_use_large_pages= 1; innobase_large_page_size= opt_large_page_size; #endif @@ -2697,7 +2818,7 @@ static int init_common_variables(const char *conf_file_name, int argc, if (use_temp_pool && bitmap_init(&temp_pool,0,1024,1)) return 1; - if (my_dbopt_init()) + if (my_database_names_init()) return 1; /* @@ -2751,8 +2872,9 @@ You should consider changing lower_case_table_names to 1 or 2", static int init_thread_environment() { (void) pthread_mutex_init(&LOCK_mysql_create_db,MY_MUTEX_INIT_SLOW); + (void) pthread_mutex_init(&LOCK_lock_db,MY_MUTEX_INIT_SLOW); (void) pthread_mutex_init(&LOCK_Acl,MY_MUTEX_INIT_SLOW); - (void) pthread_mutex_init(&LOCK_open,MY_MUTEX_INIT_FAST); + (void) pthread_mutex_init(&LOCK_open, NULL); (void) pthread_mutex_init(&LOCK_thread_count,MY_MUTEX_INIT_FAST); (void) pthread_mutex_init(&LOCK_mapped_file,MY_MUTEX_INIT_SLOW); (void) pthread_mutex_init(&LOCK_status,MY_MUTEX_INIT_FAST); @@ -2795,6 +2917,8 @@ static int init_thread_environment() (void) pthread_mutex_init(&LOCK_rpl_status, MY_MUTEX_INIT_FAST); (void) pthread_cond_init(&COND_rpl_status, NULL); #endif + (void) pthread_mutex_init(&LOCK_server_started, MY_MUTEX_INIT_FAST); + (void) pthread_cond_init(&COND_server_started,NULL); sp_cache_init(); /* Parameter for threads created for connections */ (void) pthread_attr_init(&connection_attrib); @@ -2912,7 +3036,11 @@ static void init_ssl() static int init_server_components() { DBUG_ENTER("init_server_components"); - if (table_cache_init() || hostname_cache_init()) + /* + We need to call each of these following functions to ensure that + all things are initialized so that unireg_abort() doesn't fail + */ + if (table_cache_init() | table_def_init() | hostname_cache_init()) unireg_abort(1); query_cache_result_size_limit(query_cache_limit); @@ -2925,9 +3053,66 @@ static int init_server_components() #ifdef HAVE_REPLICATION init_slave_list(); #endif - /* Setup log files */ - if (opt_log) - mysql_log.open_query_log(opt_logname); + init_events(); + + /* Setup logs */ + + /* enable old-fashioned error log */ + if (opt_error_log) + { + if (!log_error_file_ptr[0]) + fn_format(log_error_file, glob_hostname, mysql_data_home, ".err", + MY_REPLACE_EXT); /* replace '.<domain>' by '.err', bug#4997 */ + else + fn_format(log_error_file, log_error_file_ptr, mysql_data_home, ".err", + MY_UNPACK_FILENAME | MY_SAFE_PATH); + if (!log_error_file[0]) + opt_error_log= 1; // Too long file name + else + { +#ifndef EMBEDDED_LIBRARY + if (freopen(log_error_file, "a+", stdout)) +#endif + freopen(log_error_file, "a+", stderr); + } + } + +#ifdef WITH_CSV_STORAGE_ENGINE + if (opt_bootstrap) + log_output_options= LOG_FILE; + else + logger.init_log_tables(); + + if (log_output_options & LOG_NONE) + { + /* + Issue a warining if there were specified additional options to the + log-output along with NONE. Probably this wasn't what user wanted. + */ + if ((log_output_options & LOG_NONE) && (log_output_options & ~LOG_NONE)) + sql_print_warning("There were other values specified to " + "log-output besides NONE. Disabling slow " + "and general logs anyway."); + logger.set_handlers(LOG_FILE, LOG_NONE, LOG_NONE); + } + else + { + /* fall back to the log files if tables are not present */ + if (have_csv_db == SHOW_OPTION_NO) + { + sql_print_error("CSV engine is not present, falling back to the " + "log files"); + log_output_options= log_output_options & ~LOG_TABLE | LOG_FILE; + } + + logger.set_handlers(LOG_FILE, opt_slow_log ? log_output_options:LOG_NONE, + opt_log ? log_output_options:LOG_NONE); + } +#else + logger.set_handlers(LOG_FILE, opt_slow_log ? LOG_FILE:LOG_NONE, + opt_log ? LOG_FILE:LOG_NONE); +#endif + if (opt_update_log) { /* @@ -2984,11 +3169,28 @@ with --log-bin instead."); { sql_print_warning("You need to use --log-bin to make " "--log-slave-updates work."); - unireg_abort(1); + unireg_abort(1); } - if (opt_slow_log) - mysql_slow_log.open_slow_log(opt_slow_logname); + if (!opt_bin_log && (global_system_variables.binlog_format != BINLOG_FORMAT_UNSPEC)) + { + sql_print_warning("You need to use --log-bin to make " + "--binlog-format work."); + unireg_abort(1); + } + if (global_system_variables.binlog_format == BINLOG_FORMAT_UNSPEC) + { +#ifdef HAVE_NDB_BINLOG + if (opt_bin_log && have_ndbcluster == SHOW_OPTION_YES) + global_system_variables.binlog_format= BINLOG_FORMAT_ROW; + else +#endif + global_system_variables.binlog_format= BINLOG_FORMAT_STMT; + } + + /* Check that we have not let the format to unspecified at this point */ + DBUG_ASSERT((uint)global_system_variables.binlog_format <= + array_elements(binlog_format_names)-1); #ifdef HAVE_REPLICATION if (opt_log_slave_updates && replicate_same_server_id) @@ -3001,25 +3203,6 @@ server."); } #endif - if (opt_error_log) - { - if (!log_error_file_ptr[0]) - fn_format(log_error_file, glob_hostname, mysql_data_home, ".err", - MY_REPLACE_EXT); /* replace '.<domain>' by '.err', bug#4997 */ - else - fn_format(log_error_file, log_error_file_ptr, mysql_data_home, ".err", - MY_UNPACK_FILENAME | MY_SAFE_PATH); - if (!log_error_file[0]) - opt_error_log= 1; // Too long file name - else - { -#ifndef EMBEDDED_LIBRARY - if (freopen(log_error_file, "a+", stdout)) -#endif - stderror_file= freopen(log_error_file, "a+", stderr); - } - } - if (opt_bin_log) { char buf[FN_REFLEN]; @@ -3071,17 +3254,15 @@ server."); /* Check that the default storage engine is actually available. */ - if (!ha_storage_engine_is_enabled((enum db_type) - global_system_variables.table_type)) + if (!ha_storage_engine_is_enabled(global_system_variables.table_type)) { if (!opt_bootstrap) { sql_print_error("Default storage engine (%s) is not available", - ha_get_storage_engine((enum db_type) - global_system_variables.table_type)); + global_system_variables.table_type->name); unireg_abort(1); } - global_system_variables.table_type= DB_TYPE_MYISAM; + global_system_variables.table_type= &myisam_hton; } tc_log= (total_ha_2pc > 1 ? (opt_bin_log ? @@ -3112,6 +3293,10 @@ server."); mysql_bin_log.purge_logs_before_date(purge_time); } #endif +#ifdef __NETWARE__ + /* Increasing stacksize of threads on NetWare */ + pthread_attr_setstacksize(&connection_attrib, NW_THD_STACKSIZE); +#endif if (opt_myisam_log) (void) mi_log(1); @@ -3151,7 +3336,7 @@ server."); static void create_maintenance_thread() { if ( -#ifdef HAVE_BERKELEY_DB +#ifdef WITH_BERKELEY_STORAGE_ENGINE (have_berkeley_db == SHOW_OPTION_YES) || #endif (flush_time && flush_time != ~(ulong) 0L)) @@ -3263,9 +3448,22 @@ int win_main(int argc, char **argv) int main(int argc, char **argv) #endif { - DEBUGGER_OFF; + rpl_filter= new Rpl_filter; + binlog_filter= new Rpl_filter; + if (!rpl_filter || !binlog_filter) + { + sql_perror("Could not allocate replication and binlog filters"); + exit(1); + } + MY_INIT(argv[0]); // init my_sys library & pthreads + /* + Perform basic logger initialization logger. Should be called after + MY_INIT, as it initializes mutexes. Log tables are inited later. + */ + logger.init_base(); + #ifdef _CUSTOMSTARTUPCONFIG_ if (_cust_check_startup()) { @@ -3330,7 +3528,6 @@ int main(int argc, char **argv) #endif #ifdef __NETWARE__ /* Increasing stacksize of threads on NetWare */ - pthread_attr_setstacksize(&connection_attrib, NW_THD_STACKSIZE); #endif @@ -3350,9 +3547,7 @@ int main(int argc, char **argv) */ check_data_home(mysql_real_data_home); if (my_setwd(mysql_real_data_home,MYF(MY_WME))) - { unireg_abort(1); /* purecov: inspected */ - } mysql_data_home= mysql_data_home_buff; mysql_data_home[0]=FN_CURLIB; // all paths are relative from here mysql_data_home[1]=0; @@ -3367,7 +3562,6 @@ int main(int argc, char **argv) set_user(mysqld_user, user_info); } - if (opt_bin_log && !server_id) { server_id= !master_host ? 1 : 2; @@ -3389,7 +3583,7 @@ we force server id to 2, but this MySQL server will not act as a slave."); } if (init_server_components()) - exit(1); + unireg_abort(1); network_init(); @@ -3414,6 +3608,7 @@ we force server id to 2, but this MySQL server will not act as a slave."); */ error_handler_hook= my_message_sql; start_signal_handler(); // Creates pidfile + if (acl_init(opt_noacl) || my_tz_init((THD *)0, default_tz_name, opt_bootstrap)) { @@ -3422,7 +3617,7 @@ we force server id to 2, but this MySQL server will not act as a slave."); #ifndef __NETWARE__ (void) pthread_kill(signal_thread, MYSQL_KILL_SIGNAL); #endif /* __NETWARE__ */ - + if (!opt_bootstrap) (void) my_delete(pidfile_name,MYF(MY_WME)); // Not needed anymore @@ -3433,10 +3628,14 @@ we force server id to 2, but this MySQL server will not act as a slave."); if (!opt_noacl) (void) grant_init(); -#ifdef HAVE_DLOPEN if (!opt_noacl) + { + plugin_load(); +#ifdef HAVE_DLOPEN udf_init(); #endif + } + init_status_vars(); if (opt_bootstrap) /* If running with bootstrap, do not start replication. */ opt_skip_slave_start= 1; /* @@ -3476,6 +3675,10 @@ we force server id to 2, but this MySQL server will not act as a slave."); mysqld_port, MYSQL_COMPILATION_COMMENT); + // Signal threads waiting for server to be started + mysqld_server_started= 1; + pthread_cond_signal(&COND_server_started); + #if defined(__NT__) || defined(HAVE_SMEM) handle_connections_methods(); #else @@ -3523,10 +3726,12 @@ we force server id to 2, but this MySQL server will not act as a slave."); CloseHandle(hEventShutdown); } #endif + clean_up(1); wait_for_signal_thread_to_end(); clean_up_mutexes(); + shutdown_events(); my_end(opt_endinfo ? MY_CHECK_ERROR | MY_GIVE_INFO : 0); - + exit(0); return(0); /* purecov: deadcode */ } @@ -3557,8 +3762,8 @@ static char *add_quoted_string(char *to, const char *from, char *to_end) uint length= (uint) (to_end-to); if (!strchr(from, ' ')) - return strnmov(to, from, length); - return strxnmov(to, length, "\"", from, "\"", NullS); + return strmake(to, from, length-1); + return strxnmov(to, length-1, "\"", from, "\"", NullS); } @@ -3624,7 +3829,6 @@ default_service_handling(char **argv, int main(int argc, char **argv) { - /* When several instances are running on the same machine, we need to have an unique named hEventShudown through the @@ -4438,7 +4642,8 @@ enum options_mysqld OPT_BDB_HOME, OPT_BDB_LOG, OPT_BDB_TMP, OPT_BDB_SYNC, OPT_BDB_LOCK, OPT_BDB, - OPT_BDB_NO_RECOVER, OPT_BDB_SHARED, + OPT_BDB_NO_RECOVER, OPT_BDB_SHARED, + OPT_BDB_DATA_DIRECT, OPT_BDB_LOG_DIRECT, OPT_MASTER_HOST, OPT_MASTER_USER, OPT_MASTER_PASSWORD, OPT_MASTER_PORT, OPT_MASTER_INFO_FILE, OPT_MASTER_CONNECT_RETRY, @@ -4449,6 +4654,13 @@ enum options_mysqld OPT_SQL_BIN_UPDATE_SAME, OPT_REPLICATE_DO_DB, OPT_REPLICATE_IGNORE_DB, OPT_LOG_SLAVE_UPDATES, OPT_BINLOG_DO_DB, OPT_BINLOG_IGNORE_DB, + OPT_BINLOG_FORMAT, +#ifndef DBUG_OFF + OPT_BINLOG_SHOW_XID, +#endif +#ifdef HAVE_ROW_BASED_REPLICATION + OPT_BINLOG_ROWS_EVENT_MAX_SIZE, +#endif OPT_WANT_CORE, OPT_CONCURRENT_INSERT, OPT_MEMLOCK, OPT_MYISAM_RECOVER, OPT_REPLICATE_REWRITE_DB, OPT_SERVER_ID, @@ -4478,13 +4690,19 @@ enum options_mysqld OPT_NDB_FORCE_SEND, OPT_NDB_AUTOINCREMENT_PREFETCH_SZ, OPT_NDB_SHM, OPT_NDB_OPTIMIZED_NODE_SELECTION, OPT_NDB_CACHE_CHECK_TIME, OPT_NDB_MGMD, OPT_NDB_NODEID, + OPT_NDB_DISTRIBUTION, + OPT_NDB_INDEX_STAT_ENABLE, + OPT_NDB_INDEX_STAT_CACHE_ENTRIES, OPT_NDB_INDEX_STAT_UPDATE_FREQ, + OPT_NDB_EXTRA_LOGGING, + OPT_NDB_REPORT_THRESH_BINLOG_EPOCH_SLIP, + OPT_NDB_REPORT_THRESH_BINLOG_MEM_USAGE, OPT_SKIP_SAFEMALLOC, OPT_TEMP_POOL, OPT_TX_ISOLATION, OPT_COMPLETION_TYPE, OPT_SKIP_STACK_TRACE, OPT_SKIP_SYMLINKS, OPT_MAX_BINLOG_DUMP_EVENTS, OPT_SPORADIC_BINLOG_DUMP_FAIL, OPT_SAFE_USER_CREATE, OPT_SQL_MODE, OPT_HAVE_NAMED_PIPE, - OPT_DO_PSTACK, OPT_REPORT_HOST, + OPT_DO_PSTACK, OPT_EVENT_EXECUTOR, OPT_REPORT_HOST, OPT_REPORT_USER, OPT_REPORT_PASSWORD, OPT_REPORT_PORT, OPT_SHOW_SLAVE_AUTH_INFO, OPT_SLAVE_LOAD_TMPDIR, OPT_NO_MIX_TYPE, @@ -4513,6 +4731,7 @@ enum options_mysqld OPT_MAX_ERROR_COUNT, OPT_MULTI_RANGE_COUNT, OPT_MYISAM_DATA_POINTER_SIZE, OPT_MYISAM_BLOCK_SIZE, OPT_MYISAM_MAX_EXTRA_SORT_FILE_SIZE, OPT_MYISAM_MAX_SORT_FILE_SIZE, OPT_MYISAM_SORT_BUFFER_SIZE, + OPT_MYISAM_USE_MMAP, OPT_MYISAM_STATS_METHOD, OPT_NET_BUFFER_LENGTH, OPT_NET_RETRY_COUNT, OPT_NET_READ_TIMEOUT, OPT_NET_WRITE_TIMEOUT, @@ -4524,7 +4743,7 @@ enum options_mysqld OPT_RELAY_LOG_PURGE, OPT_SLAVE_NET_TIMEOUT, OPT_SLAVE_COMPRESSED_PROTOCOL, OPT_SLOW_LAUNCH_TIME, OPT_SLAVE_TRANS_RETRIES, OPT_READONLY, OPT_DEBUGGING, - OPT_SORT_BUFFER, OPT_TABLE_CACHE, + OPT_SORT_BUFFER, OPT_TABLE_OPEN_CACHE, OPT_TABLE_DEF_CACHE, OPT_THREAD_CONCURRENCY, OPT_THREAD_CACHE_SIZE, OPT_TMP_TABLE_SIZE, OPT_THREAD_STACK, OPT_WAIT_TIMEOUT, OPT_MYISAM_REPAIR_THREADS, @@ -4550,8 +4769,10 @@ enum options_mysqld OPT_INNODB_CONCURRENCY_TICKETS, OPT_INNODB_THREAD_SLEEP_DELAY, OPT_BDB_CACHE_SIZE, + OPT_BDB_CACHE_PARTS, OPT_BDB_LOG_BUFFER_SIZE, OPT_BDB_MAX_LOCK, + OPT_BDB_REGION_SIZE, OPT_ERROR_LOG_FILE, OPT_DEFAULT_WEEK_FORMAT, OPT_RANGE_ALLOC_BLOCK_SIZE, OPT_ALLOW_SUSPICIOUS_UDFS, @@ -4565,6 +4786,7 @@ enum options_mysqld OPT_ENABLE_SHARED_MEMORY, OPT_SHARED_MEMORY_BASE_NAME, OPT_OLD_PASSWORDS, + OPT_OLD_ALTER_TABLE, OPT_EXPIRE_LOGS_DAYS, OPT_GROUP_CONCAT_MAX_LEN, OPT_DEFAULT_COLLATION, @@ -4589,6 +4811,8 @@ enum options_mysqld OPT_OLD_STYLE_USER_LIMITS, OPT_LOG_SLOW_ADMIN_STATEMENTS, OPT_TABLE_LOCK_WAIT_TIMEOUT, + OPT_PLUGIN_DIR, + OPT_LOG_OUTPUT, OPT_PORT_OPEN_TIMEOUT }; @@ -4637,12 +4861,18 @@ struct my_option my_long_options[] = Disable with --skip-bdb (will save memory).", (gptr*) &opt_bdb, (gptr*) &opt_bdb, 0, GET_BOOL, NO_ARG, OPT_BDB_DEFAULT, 0, 0, 0, 0, 0}, -#ifdef HAVE_BERKELEY_DB +#ifdef WITH_BERKELEY_STORAGE_ENGINE + {"bdb-data-direct", OPT_BDB_DATA_DIRECT, + "Turn off system buffering of BDB database files to avoid double caching.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"bdb-home", OPT_BDB_HOME, "Berkeley home directory.", (gptr*) &berkeley_home, (gptr*) &berkeley_home, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"bdb-lock-detect", OPT_BDB_LOCK, "Berkeley lock detect (DEFAULT, OLDEST, RANDOM or YOUNGEST, # sec).", 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"bdb-log-direct", OPT_BDB_LOG_DIRECT, + "Turn off system buffering of BDB log files to avoid double caching.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"bdb-logdir", OPT_BDB_LOG, "Berkeley DB log file directory.", (gptr*) &berkeley_logdir, (gptr*) &berkeley_logdir, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, @@ -4658,19 +4888,57 @@ Disable with --skip-bdb (will save memory).", {"bdb-tmpdir", OPT_BDB_TMP, "Berkeley DB tempfile name.", (gptr*) &berkeley_tmpdir, (gptr*) &berkeley_tmpdir, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, -#endif /* HAVE_BERKELEY_DB */ +#endif /* WITH_BERKELEY_STORAGE_ENGINE */ {"big-tables", OPT_BIG_TABLES, "Allow big result sets by saving all temporary sets on file (Solves most 'table full' errors).", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"bind-address", OPT_BIND_ADDRESS, "IP address to bind to.", (gptr*) &my_bind_addr_str, (gptr*) &my_bind_addr_str, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"binlog_format", OPT_BINLOG_FORMAT, +#ifdef HAVE_ROW_BASED_REPLICATION + "Tell the master the form of binary logging to use: either 'row' for " + "row-based binary logging, or 'statement' for statement-based binary " + "logging, or 'mixed'. 'mixed' is statement-based binary logging except " + "for those statements where only row-based is correct: those which " + "involve user-defined functions (i.e. UDFs) or the UUID() function; for " + "those, row-based binary logging is automatically used. " +#ifdef HAVE_NDB_BINLOG + "If ndbcluster is enabled, the default is 'row'." +#endif +#else + "Tell the master the form of binary logging to use: this build " + "supports only statement-based binary logging, so only 'statement' is " + "a legal value." +#endif + , 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0 }, {"binlog-do-db", OPT_BINLOG_DO_DB, "Tells the master it should log updates for the specified database, and exclude all others not explicitly mentioned.", 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"binlog-ignore-db", OPT_BINLOG_IGNORE_DB, "Tells the master that updates to the given database should not be logged tothe binary log.", 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, +#if !defined(DBUG_OFF) && !defined(MYSQL_CLIENT) + {"binlog-show-xid", OPT_BINLOG_SHOW_XID, + "Option used by mysql-test for debugging and testing: " + "do not display the XID in SHOW BINLOG EVENTS; " + "may be removed in future versions", + (gptr*) &Xid_log_event::show_xid, (gptr*) &Xid_log_event::show_xid, + 0, GET_BOOL, NO_ARG, 1, 0, 0, 0, 0, 0}, +#endif +#ifdef HAVE_ROW_BASED_REPLICATION + {"binlog-row-event-max-size", OPT_BINLOG_ROWS_EVENT_MAX_SIZE, + "The maximum size of a row-based binary log event in bytes. Rows will be " + "grouped into events smaller than this size if possible. " + "The value has to be a multiple of 256.", + (gptr*) &opt_binlog_rows_event_max_size, + (gptr*) &opt_binlog_rows_event_max_size, 0, + GET_ULONG, REQUIRED_ARG, + /* def_value */ 1024, /* min_value */ 256, /* max_value */ ULONG_MAX, + /* sub_size */ 0, /* block_size */ 256, + /* app_type */ 0 + }, +#endif {"bootstrap", OPT_BOOTSTRAP, "Used by mysql installation scripts.", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"character-set-client-handshake", OPT_CHARACTER_SET_CLIENT_HANDSHAKE, @@ -4765,6 +5033,9 @@ Disable with --skip-bdb (will save memory).", (gptr*) &global_system_variables.engine_condition_pushdown, (gptr*) &global_system_variables.engine_condition_pushdown, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"event-scheduler", OPT_EVENT_EXECUTOR, "Enable/disable the event scheduler.", + (gptr*) &opt_event_executor, (gptr*) &opt_event_executor, 0, GET_BOOL, NO_ARG, + 0/*default*/, 0/*min-value*/, 1/*max-value*/, 0, 0, 0}, {"exit-info", 'T', "Used for debugging; Use at your own risk!", 0, 0, 0, GET_LONG, OPT_ARG, 0, 0, 0, 0, 0, 0}, {"external-locking", OPT_USE_LOCKING, "Use system (external) locking. With this option enabled you can run myisamchk to test (not repair) tables while the MySQL server is running.", @@ -4799,7 +5070,7 @@ Disable with --skip-large-pages.", Disable with --skip-innodb (will save memory).", (gptr*) &opt_innodb, (gptr*) &opt_innodb, 0, GET_BOOL, NO_ARG, OPT_INNODB_DEFAULT, 0, 0, 0, 0, 0}, -#ifdef HAVE_INNOBASE_DB +#ifdef WITH_INNOBASE_STORAGE_ENGINE {"innodb_checksums", OPT_INNODB_CHECKSUMS, "Enable InnoDB checksums validation (enabled by default). \ Disable with --skip-innodb-checksums.", (gptr*) &innobase_use_checksums, (gptr*) &innobase_use_checksums, 0, GET_BOOL, NO_ARG, 1, 0, 0, 0, 0, 0}, @@ -4807,7 +5078,7 @@ Disable with --skip-innodb-checksums.", (gptr*) &innobase_use_checksums, {"innodb_data_file_path", OPT_INNODB_DATA_FILE_PATH, "Path to individual files and their sizes.", 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, -#ifdef HAVE_INNOBASE_DB +#ifdef WITH_INNOBASE_STORAGE_ENGINE {"innodb_data_home_dir", OPT_INNODB_DATA_HOME_DIR, "The common part for InnoDB table spaces.", (gptr*) &innobase_data_home_dir, (gptr*) &innobase_data_home_dir, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, @@ -4835,15 +5106,15 @@ Disable with --skip-innodb-doublewrite.", (gptr*) &innobase_use_doublewrite, (gptr*) &innobase_file_per_table, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, {"innodb_flush_log_at_trx_commit", OPT_INNODB_FLUSH_LOG_AT_TRX_COMMIT, "Set to 0 (write and flush once per second), 1 (write and flush at each commit) or 2 (write at commit, flush once per second).", - (gptr*) &srv_flush_log_at_trx_commit, - (gptr*) &srv_flush_log_at_trx_commit, + (gptr*) &innobase_flush_log_at_trx_commit, + (gptr*) &innobase_flush_log_at_trx_commit, 0, GET_ULONG, OPT_ARG, 1, 0, 2, 0, 0, 0}, {"innodb_flush_method", OPT_INNODB_FLUSH_METHOD, "With which method to flush data.", (gptr*) &innobase_unix_file_flush_method, (gptr*) &innobase_unix_file_flush_method, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"innodb_locks_unsafe_for_binlog", OPT_INNODB_LOCKS_UNSAFE_FOR_BINLOG, - "Force InnoDB not to use next-key locking. Instead use only row-level locking", + "Force InnoDB to not use next-key locking, to use only row-level locking.", (gptr*) &innobase_locks_unsafe_for_binlog, (gptr*) &innobase_locks_unsafe_for_binlog, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, {"innodb_log_arch_dir", OPT_INNODB_LOG_ARCH_DIR, @@ -4878,7 +5149,7 @@ Disable with --skip-innodb-doublewrite.", (gptr*) &innobase_use_doublewrite, (gptr*) &global_system_variables.innodb_table_locks, (gptr*) &global_system_variables.innodb_table_locks, 0, GET_BOOL, OPT_ARG, 1, 0, 0, 0, 0, 0}, -#endif /* End HAVE_INNOBASE_DB */ +#endif /* End WITH_INNOBASE_STORAGE_ENGINE */ {"isam", OPT_ISAM, "Obsolete. ISAM storage engine is no longer supported.", (gptr*) &opt_isam, (gptr*) &opt_isam, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, @@ -4903,16 +5174,6 @@ Disable with --skip-innodb-doublewrite.", (gptr*) &innobase_use_doublewrite, "File that holds the names for last binary log files.", (gptr*) &opt_binlog_index_name, (gptr*) &opt_binlog_index_name, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, - /* - This option starts with "log-bin" to emphasize that it is specific of - binary logging. - */ - {"log-bin-trust-function-creators", OPT_LOG_BIN_TRUST_FUNCTION_CREATORS, - "If equal to 0 (the default), then when --log-bin is used, creation of " - "a stored function is allowed only to users having the SUPER privilege and" - " only if this function may not break binary logging.", - (gptr*) &trust_function_creators, (gptr*) &trust_function_creators, 0, - GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, #ifndef TO_BE_REMOVED_IN_5_1_OR_6_0 /* In 5.0.6 we introduced the below option, then in 5.0.16 we renamed it to @@ -4925,6 +5186,21 @@ Disable with --skip-innodb-doublewrite.", (gptr*) &innobase_use_doublewrite, (gptr*) &trust_function_creators, (gptr*) &trust_function_creators, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, #endif + /* + This option starts with "log-bin" to emphasize that it is specific of + binary logging. + */ + {"log-bin-trust-function-creators", OPT_LOG_BIN_TRUST_FUNCTION_CREATORS, + "If equal to 0 (the default), then when --log-bin is used, creation of " + "a stored function (or trigger) is allowed only to users having the SUPER privilege " + "and only if this stored function (trigger) may not break binary logging." +#ifdef HAVE_ROW_BASED_REPLICATION + "Note that if ALL connections to this server ALWAYS use row-based binary " + "logging, the security issues do not exist and the binary logging cannot " + "break, so you can safely set this to 1." +#endif + ,(gptr*) &trust_function_creators, (gptr*) &trust_function_creators, 0, + GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, {"log-error", OPT_ERROR_LOG_FILE, "Error log file.", (gptr*) &log_error_file_ptr, (gptr*) &log_error_file_ptr, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0}, @@ -4934,6 +5210,13 @@ Disable with --skip-innodb-doublewrite.", (gptr*) &innobase_use_doublewrite, {"log-long-format", '0', "Log some extra information to update log. Please note that this option is deprecated; see --log-short-format option.", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, +#ifdef WITH_CSV_STORAGE_ENGINE + {"log-output", OPT_LOG_OUTPUT, + "Syntax: log-output[=value[,value...]], where \"value\" could be TABLE, " + "FILE or NONE.", + (gptr*) &log_output_str, (gptr*) &log_output_str, 0, + GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0}, +#endif {"log-queries-not-using-indexes", OPT_LOG_QUERIES_NOT_USING_INDEXES, "Log queries that are executed without benefit of any index to the slow log if it is open.", (gptr*) &opt_log_queries_not_using_indexes, (gptr*) &opt_log_queries_not_using_indexes, @@ -5049,7 +5332,7 @@ master-ssl", Disable with --skip-ndbcluster (will save memory).", (gptr*) &opt_ndbcluster, (gptr*) &opt_ndbcluster, 0, GET_BOOL, NO_ARG, OPT_NDBCLUSTER_DEFAULT, 0, 0, 0, 0, 0}, -#ifdef HAVE_NDBCLUSTER_DB +#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE {"ndb-connectstring", OPT_NDB_CONNECTSTRING, "Connect string for ndbcluster.", (gptr*) &opt_ndb_connectstring, @@ -5070,6 +5353,11 @@ Disable with --skip-ndbcluster (will save memory).", (gptr*) &global_system_variables.ndb_autoincrement_prefetch_sz, (gptr*) &global_system_variables.ndb_autoincrement_prefetch_sz, 0, GET_ULONG, REQUIRED_ARG, 32, 1, 256, 0, 0, 0}, + {"ndb-distribution", OPT_NDB_DISTRIBUTION, + "Default distribution for new tables in ndb", + (gptr*) &opt_ndb_distribution, + (gptr*) &opt_ndb_distribution, + 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"ndb-force-send", OPT_NDB_FORCE_SEND, "Force send of buffers to ndb immediately without waiting for " "other threads.", @@ -5081,6 +5369,29 @@ Disable with --skip-ndbcluster (will save memory).", (gptr*) &global_system_variables.ndb_force_send, (gptr*) &global_system_variables.ndb_force_send, 0, GET_BOOL, OPT_ARG, 1, 0, 0, 0, 0, 0}, + {"ndb-extra-logging", OPT_NDB_EXTRA_LOGGING, + "Turn on more logging in the error log.", + (gptr*) &ndb_extra_logging, + (gptr*) &ndb_extra_logging, + 0, GET_INT, OPT_ARG, 0, 0, 0, 0, 0, 0}, +#ifdef HAVE_NDB_BINLOG + {"ndb-report-thresh-binlog-epoch-slip", OPT_NDB_REPORT_THRESH_BINLOG_EPOCH_SLIP, + "Threshold on number of epochs to be behind before reporting binlog status. " + "E.g. 3 means that if the difference between what epoch has been received " + "from the storage nodes and what has been applied to the binlog is 3 or more, " + "a status message will be sent to the cluster log.", + (gptr*) &ndb_report_thresh_binlog_epoch_slip, + (gptr*) &ndb_report_thresh_binlog_epoch_slip, + 0, GET_ULONG, REQUIRED_ARG, 3, 0, 256, 0, 0, 0}, + {"ndb-report-thresh-binlog-mem-usage", OPT_NDB_REPORT_THRESH_BINLOG_MEM_USAGE, + "Threshold on percentage of free memory before reporting binlog status. E.g. " + "10 means that if amount of available memory for receiving binlog data from " + "the storage nodes goes below 10%, " + "a status message will be sent to the cluster log.", + (gptr*) &ndb_report_thresh_binlog_mem_usage, + (gptr*) &ndb_report_thresh_binlog_mem_usage, + 0, GET_ULONG, REQUIRED_ARG, 10, 0, 100, 0, 0, 0}, +#endif {"ndb-use-exact-count", OPT_NDB_USE_EXACT_COUNT, "Use exact records count during query planning and for fast " "select count(*), disable for faster queries.", @@ -5106,6 +5417,23 @@ Disable with --skip-ndbcluster (will save memory).", "A dedicated thread is created to, at the given millisecons interval, invalidate the query cache if another MySQL server in the cluster has changed the data in the database.", (gptr*) &opt_ndb_cache_check_time, (gptr*) &opt_ndb_cache_check_time, 0, GET_ULONG, REQUIRED_ARG, 0, 0, LONG_TIMEOUT, 0, 1, 0}, + {"ndb-index-stat-enable", OPT_NDB_INDEX_STAT_ENABLE, + "Use ndb index statistics in query optimization.", + (gptr*) &global_system_variables.ndb_index_stat_enable, + (gptr*) &max_system_variables.ndb_index_stat_enable, + 0, GET_BOOL, OPT_ARG, 1, 0, 1, 0, 0, 0}, + {"ndb-index-stat-cache-entries", OPT_NDB_INDEX_STAT_CACHE_ENTRIES, + "Number of start/end keys to store in statistics memory cache." + " Zero means no cache and forces query of db nodes always.", + (gptr*) &global_system_variables.ndb_index_stat_cache_entries, + (gptr*) &max_system_variables.ndb_index_stat_cache_entries, + 0, GET_ULONG, OPT_ARG, 32, 0, ~0L, 0, 0, 0}, + {"ndb-index-stat-update-freq", OPT_NDB_INDEX_STAT_UPDATE_FREQ, + "How often, in the long run, to query db nodes instead of statistics cache." + " For example 20 means every 20th time.", + (gptr*) &global_system_variables.ndb_index_stat_update_freq, + (gptr*) &max_system_variables.ndb_index_stat_update_freq, + 0, GET_ULONG, OPT_ARG, 20, 0, ~0L, 0, 0, 0}, #endif {"new", 'n', "Use very new possible 'unsafe' functions.", (gptr*) &global_system_variables.new_mode, @@ -5116,6 +5444,11 @@ Disable with --skip-ndbcluster (will save memory).", (gptr*) &opt_no_mix_types, (gptr*) &opt_no_mix_types, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, #endif + {"old-alter-table", OPT_OLD_ALTER_TABLE, + "Use old, non-optimized alter table.", + (gptr*) &global_system_variables.old_alter_table, + (gptr*) &max_system_variables.old_alter_table, 0, GET_BOOL, NO_ARG, + 0, 0, 0, 0, 0, 0}, {"old-passwords", OPT_OLD_PASSWORDS, "Use old password encryption method (needed for 4.0 and older clients).", (gptr*) &global_system_variables.old_passwords, (gptr*) &max_system_variables.old_passwords, 0, GET_BOOL, NO_ARG, @@ -5357,11 +5690,15 @@ log and this option does nothing anymore.", "The number of outstanding connection requests MySQL can have. This comes into play when the main MySQL thread gets very many connection requests in a very short time.", (gptr*) &back_log, (gptr*) &back_log, 0, GET_ULONG, REQUIRED_ARG, 50, 1, 65535, 0, 1, 0 }, -#ifdef HAVE_BERKELEY_DB +#ifdef WITH_BERKELEY_STORAGE_ENGINE + { "bdb_cache_parts", OPT_BDB_CACHE_PARTS, + "Number of parts to use for BDB cache.", + (gptr*) &berkeley_cache_parts, (gptr*) &berkeley_cache_parts, 0, GET_ULONG, + REQUIRED_ARG, 1, 1, 1024, 0, 1, 0}, { "bdb_cache_size", OPT_BDB_CACHE_SIZE, "The buffer that is allocated to cache index and rows for BDB tables.", - (gptr*) &berkeley_cache_size, (gptr*) &berkeley_cache_size, 0, GET_ULONG, - REQUIRED_ARG, KEY_CACHE_SIZE, 20*1024, (long) ~0, 0, IO_SIZE, 0}, + (gptr*) &berkeley_cache_size, (gptr*) &berkeley_cache_size, 0, GET_ULL, + REQUIRED_ARG, KEY_CACHE_SIZE, 20*1024, (ulonglong) ~0, 0, IO_SIZE, 0}, /* QQ: The following should be removed soon! (bdb_max_lock preferred) */ {"bdb_lock_max", OPT_BDB_MAX_LOCK, "Synonym for bdb_max_lock.", (gptr*) &berkeley_max_lock, (gptr*) &berkeley_max_lock, 0, GET_ULONG, @@ -5374,7 +5711,11 @@ log and this option does nothing anymore.", "The maximum number of locks you can have active on a BDB table.", (gptr*) &berkeley_max_lock, (gptr*) &berkeley_max_lock, 0, GET_ULONG, REQUIRED_ARG, 10000, 0, (long) ~0, 0, 1, 0}, -#endif /* HAVE_BERKELEY_DB */ + {"bdb_region_size", OPT_BDB_REGION_SIZE, + "The size of the underlying logging area of the Berkeley DB environment.", + (gptr*) &berkeley_region_size, (gptr*) &berkeley_region_size, 0, GET_ULONG, + OPT_ARG, 60*1024L, 60*1024L, (long) ~0, 0, 1, 0}, +#endif /* WITH_BERKELEY_STORAGE_ENGINE */ {"binlog_cache_size", OPT_BINLOG_CACHE_SIZE, "The size of the cache to hold the SQL statements for the binary log during a transaction. If you often use big, multi-statement transactions you can increase this to get more performance.", (gptr*) &binlog_cache_size, (gptr*) &binlog_cache_size, 0, GET_ULONG, @@ -5455,7 +5796,7 @@ log and this option does nothing anymore.", (gptr*) &global_system_variables.group_concat_max_len, (gptr*) &max_system_variables.group_concat_max_len, 0, GET_ULONG, REQUIRED_ARG, 1024, 4, (long) ~0, 0, 1, 0}, -#ifdef HAVE_INNOBASE_DB +#ifdef WITH_INNOBASE_STORAGE_ENGINE {"innodb_additional_mem_pool_size", OPT_INNODB_ADDITIONAL_MEM_POOL_SIZE, "Size of a memory pool InnoDB uses to store data dictionary information and other internal data structures.", (gptr*) &innobase_additional_mem_pool_size, @@ -5525,18 +5866,16 @@ log and this option does nothing anymore.", (gptr*) &srv_n_spin_wait_rounds, 0, GET_LONG, REQUIRED_ARG, 20L, 0L, ~0L, 0, 1L, 0}, {"innodb_thread_concurrency", OPT_INNODB_THREAD_CONCURRENCY, - "Helps in performance tuning in heavily concurrent environments. " - "Sets the maximum number of threads allowed inside InnoDB. Value 0" - " will disable the thread throttling.", + "Helps in performance tuning in heavily concurrent environments.", (gptr*) &srv_thread_concurrency, (gptr*) &srv_thread_concurrency, - 0, GET_LONG, REQUIRED_ARG, 0, 0, 1000, 0, 1, 0}, + 0, GET_LONG, REQUIRED_ARG, 20, 1, 1000, 0, 1, 0}, {"innodb_thread_sleep_delay", OPT_INNODB_THREAD_SLEEP_DELAY, "Time of innodb thread sleeping before joining InnoDB queue (usec). Value 0" " disable a sleep", (gptr*) &srv_thread_sleep_delay, (gptr*) &srv_thread_sleep_delay, 0, GET_LONG, REQUIRED_ARG, 10000L, 0L, ~0L, 0, 1L, 0}, -#endif /* HAVE_INNOBASE_DB */ +#endif /* WITH_INNOBASE_STORAGE_ENGINE */ {"interactive_timeout", OPT_INTERACTIVE_TIMEOUT, "The number of seconds the server waits for activity on an interactive connection before closing it.", (gptr*) &global_system_variables.net_interactive_timeout, @@ -5706,6 +6045,11 @@ The minimum value for this variable is 4096.", (gptr*) &global_system_variables.myisam_sort_buff_size, (gptr*) &max_system_variables.myisam_sort_buff_size, 0, GET_ULONG, REQUIRED_ARG, 8192*1024, 4, ~0L, 0, 1, 0}, + {"myisam_use_mmap", OPT_MYISAM_USE_MMAP, + "Use memory mapping for reading and writing MyISAM tables", + (gptr*) &opt_myisam_use_mmap, + (gptr*) &opt_myisam_use_mmap, 0, GET_BOOL, NO_ARG, 0, + 0, 0, 0, 0, 0}, {"myisam_stats_method", OPT_MYISAM_STATS_METHOD, "Specifies how MyISAM index statistics collection code should threat NULLs. " "Possible values of name are \"nulls_unequal\" (default behavior for 4.1/5.0), " @@ -5746,6 +6090,10 @@ The minimum value for this variable is 4096.", (gptr*) &global_system_variables.optimizer_search_depth, (gptr*) &max_system_variables.optimizer_search_depth, 0, GET_ULONG, OPT_ARG, MAX_TABLES+1, 0, MAX_TABLES+2, 0, 1, 0}, + {"plugin_dir", OPT_PLUGIN_DIR, + "Directory for plugins.", + (gptr*) &opt_plugin_dir_ptr, (gptr*) &opt_plugin_dir_ptr, 0, + GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"preload_buffer_size", OPT_PRELOAD_BUFFER_SIZE, "The size of the buffer that is allocated when preloading indexes", (gptr*) &global_system_variables.preload_buff_size, @@ -5852,12 +6200,12 @@ The minimum value for this variable is 4096.", (gptr*) &max_system_variables.sortbuff_size, 0, GET_ULONG, REQUIRED_ARG, MAX_SORT_MEMORY, MIN_SORT_MEMORY+MALLOC_OVERHEAD*2, ~0L, MALLOC_OVERHEAD, 1, 0}, -#ifdef HAVE_BERKELEY_DB +#ifdef WITH_BERKELEY_STORAGE_ENGINE {"sync-bdb-logs", OPT_BDB_SYNC, "Synchronously flush Berkeley DB logs. Enabled by default", (gptr*) &opt_sync_bdb_logs, (gptr*) &opt_sync_bdb_logs, 0, GET_BOOL, NO_ARG, 1, 0, 0, 0, 0, 0}, -#endif /* HAVE_BERKELEY_DB */ +#endif /* WITH_BERKELEY_STORAGE_ENGINE */ {"sync-binlog", OPT_SYNC_BINLOG, "Synchronously flush binary log to disk after every #th event. " "Use 0 (default) to disable synchronous flushing.", @@ -5883,13 +6231,21 @@ The minimum value for this variable is 4096.", (gptr*) &global_system_variables.sync_replication_timeout, 0, GET_ULONG, REQUIRED_ARG, 10, 0, ~0L, 0, 1, 0}, #endif /* HAVE_REPLICATION */ - {"table_cache", OPT_TABLE_CACHE, - "The number of open tables for all threads.", (gptr*) &table_cache_size, - (gptr*) &table_cache_size, 0, GET_ULONG, REQUIRED_ARG, 64, 1, 512*1024L, - 0, 1, 0}, - {"table_lock_wait_timeout", OPT_TABLE_LOCK_WAIT_TIMEOUT, "Timeout in " - "seconds to wait for a table level lock before returning an error. Used" - " only if the connection has active cursors.", + {"table_cache", OPT_TABLE_OPEN_CACHE, + "Deprecated; use --table_open_cache instead.", + (gptr*) &table_cache_size, (gptr*) &table_cache_size, 0, GET_ULONG, + REQUIRED_ARG, 64, 1, 512*1024L, 0, 1, 0}, + {"table_definition_cache", OPT_TABLE_DEF_CACHE, + "The number of cached table definitions.", + (gptr*) &table_def_size, (gptr*) &table_def_size, + 0, GET_ULONG, REQUIRED_ARG, 128, 1, 512*1024L, 0, 1, 0}, + {"table_open_cache", OPT_TABLE_OPEN_CACHE, + "The number of cached open tables.", + (gptr*) &table_cache_size, (gptr*) &table_cache_size, + 0, GET_ULONG, REQUIRED_ARG, 64, 1, 512*1024L, 0, 1, 0}, + {"table_lock_wait_timeout", OPT_TABLE_LOCK_WAIT_TIMEOUT, + "Timeout in seconds to wait for a table level lock before returning an " + "error. Used only if the connection has active cursors.", (gptr*) &table_lock_wait_timeout, (gptr*) &table_lock_wait_timeout, 0, GET_ULONG, REQUIRED_ARG, 50, 1, 1024 * 1024 * 1024, 0, 1, 0}, {"thread_cache_size", OPT_THREAD_CACHE_SIZE, @@ -5938,8 +6294,338 @@ The minimum value for this variable is 4096.", {0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} }; +static int show_question(THD *thd, SHOW_VAR *var, char *buff) +{ + var->type= SHOW_LONGLONG; + var->value= (char *)&thd->query_id; + return 0; +} -struct show_var_st status_vars[]= { +static int show_net_compression(THD *thd, SHOW_VAR *var, char *buff) +{ + var->type= SHOW_MY_BOOL; + var->value= (char *)&thd->net.compress; + return 0; +} + +static int show_starttime(THD *thd, SHOW_VAR *var, char *buff) +{ + var->type= SHOW_LONG; + var->value= buff; + *((long *)buff)= (long) (thd->query_start() - start_time); + return 0; +} + +#ifdef HAVE_REPLICATION +static int show_rpl_status(THD *thd, SHOW_VAR *var, char *buff) +{ + var->type= SHOW_CHAR; + var->value= const_cast<char*>(rpl_status_type[(int)rpl_status]); + return 0; +} + +static int show_slave_running(THD *thd, SHOW_VAR *var, char *buff) +{ + var->type= SHOW_CHAR; + pthread_mutex_lock(&LOCK_active_mi); + var->value= const_cast<char*>((active_mi && active_mi->slave_running && + active_mi->rli.slave_running) ? "ON" : "OFF"); + pthread_mutex_unlock(&LOCK_active_mi); + return 0; +} + +static int show_slave_retried_trans(THD *thd, SHOW_VAR *var, char *buff) +{ + /* + TODO: with multimaster, have one such counter per line in + SHOW SLAVE STATUS, and have the sum over all lines here. + */ + pthread_mutex_lock(&LOCK_active_mi); + if (active_mi) + { + var->type= SHOW_LONG; + var->value= buff; + pthread_mutex_lock(&active_mi->rli.data_lock); + *((long *)buff)= (long)active_mi->rli.retried_trans; + pthread_mutex_unlock(&active_mi->rli.data_lock); + } + else + var->type= SHOW_UNDEF; + pthread_mutex_unlock(&LOCK_active_mi); + return 0; +} +#endif /* HAVE_REPLICATION */ + +static int show_open_tables(THD *thd, SHOW_VAR *var, char *buff) +{ + var->type= SHOW_LONG; + var->value= buff; + *((long *)buff)= (long)cached_open_tables(); + return 0; +} + +static int show_table_definitions(THD *thd, SHOW_VAR *var, char *buff) +{ + var->type= SHOW_LONG; + var->value= buff; + *((long *)buff)= (long)cached_table_definitions(); + return 0; +} + +#ifdef HAVE_OPENSSL +/* Functions relying on CTX */ +static int show_ssl_ctx_sess_accept(THD *thd, SHOW_VAR *var, char *buff) +{ + var->type= SHOW_LONG; + var->value= buff; + *((long *)buff)= (!ssl_acceptor_fd ? 0 : + SSL_CTX_sess_accept(ssl_acceptor_fd->ssl_context)); + return 0; +} + +static int show_ssl_ctx_sess_accept_good(THD *thd, SHOW_VAR *var, char *buff) +{ + var->type= SHOW_LONG; + var->value= buff; + *((long *)buff)= (!ssl_acceptor_fd ? 0 : + SSL_CTX_sess_accept_good(ssl_acceptor_fd->ssl_context)); + return 0; +} + +static int show_ssl_ctx_sess_connect_good(THD *thd, SHOW_VAR *var, char *buff) +{ + var->type= SHOW_LONG; + var->value= buff; + *((long *)buff)= (!ssl_acceptor_fd ? 0 : + SSL_CTX_sess_connect_good(ssl_acceptor_fd->ssl_context)); + return 0; +} + +static int show_ssl_ctx_sess_accept_renegotiate(THD *thd, SHOW_VAR *var, char *buff) +{ + var->type= SHOW_LONG; + var->value= buff; + *((long *)buff)= (!ssl_acceptor_fd ? 0 : + SSL_CTX_sess_accept_renegotiate(ssl_acceptor_fd->ssl_context)); + return 0; +} + +static int show_ssl_ctx_sess_connect_renegotiate(THD *thd, SHOW_VAR *var, char *buff) +{ + var->type= SHOW_LONG; + var->value= buff; + *((long *)buff)= (!ssl_acceptor_fd ? 0 : + SSL_CTX_sess_connect_renegotiate(ssl_acceptor_fd->ssl_context)); + return 0; +} + +static int show_ssl_ctx_sess_cb_hits(THD *thd, SHOW_VAR *var, char *buff) +{ + var->type= SHOW_LONG; + var->value= buff; + *((long *)buff)= (!ssl_acceptor_fd ? 0 : + SSL_CTX_sess_cb_hits(ssl_acceptor_fd->ssl_context)); + return 0; +} + +static int show_ssl_ctx_sess_hits(THD *thd, SHOW_VAR *var, char *buff) +{ + var->type= SHOW_LONG; + var->value= buff; + *((long *)buff)= (!ssl_acceptor_fd ? 0 : + SSL_CTX_sess_hits(ssl_acceptor_fd->ssl_context)); + return 0; +} + +static int show_ssl_ctx_sess_cache_full(THD *thd, SHOW_VAR *var, char *buff) +{ + var->type= SHOW_LONG; + var->value= buff; + *((long *)buff)= (!ssl_acceptor_fd ? 0 : + SSL_CTX_sess_cache_full(ssl_acceptor_fd->ssl_context)); + return 0; +} + +static int show_ssl_ctx_sess_misses(THD *thd, SHOW_VAR *var, char *buff) +{ + var->type= SHOW_LONG; + var->value= buff; + *((long *)buff)= (!ssl_acceptor_fd ? 0 : + SSL_CTX_sess_misses(ssl_acceptor_fd->ssl_context)); + return 0; +} + +static int show_ssl_ctx_sess_timeouts(THD *thd, SHOW_VAR *var, char *buff) +{ + var->type= SHOW_LONG; + var->value= buff; + *((long *)buff)= (!ssl_acceptor_fd ? 0 : + SSL_CTX_sess_timeouts(ssl_acceptor_fd->ssl_context)); + return 0; +} + +static int show_ssl_ctx_sess_number(THD *thd, SHOW_VAR *var, char *buff) +{ + var->type= SHOW_LONG; + var->value= buff; + *((long *)buff)= (!ssl_acceptor_fd ? 0 : + SSL_CTX_sess_number(ssl_acceptor_fd->ssl_context)); + return 0; +} + +static int show_ssl_ctx_sess_connect(THD *thd, SHOW_VAR *var, char *buff) +{ + var->type= SHOW_LONG; + var->value= buff; + *((long *)buff)= (!ssl_acceptor_fd ? 0 : + SSL_CTX_sess_connect(ssl_acceptor_fd->ssl_context)); + return 0; +} + +static int show_ssl_ctx_sess_get_cache_size(THD *thd, SHOW_VAR *var, char *buff) +{ + var->type= SHOW_LONG; + var->value= buff; + *((long *)buff)= (!ssl_acceptor_fd ? 0 : + SSL_CTX_sess_get_cache_size(ssl_acceptor_fd->ssl_context)); + return 0; +} + +static int show_ssl_ctx_get_verify_mode(THD *thd, SHOW_VAR *var, char *buff) +{ + var->type= SHOW_LONG; + var->value= buff; + *((long *)buff)= (!ssl_acceptor_fd ? 0 : + SSL_CTX_get_verify_mode(ssl_acceptor_fd->ssl_context)); + return 0; +} + +static int show_ssl_ctx_get_verify_depth(THD *thd, SHOW_VAR *var, char *buff) +{ + var->type= SHOW_LONG; + var->value= buff; + *((long *)buff)= (!ssl_acceptor_fd ? 0 : + SSL_CTX_get_verify_depth(ssl_acceptor_fd->ssl_context)); + return 0; +} + +static int show_ssl_ctx_get_session_cache_mode(THD *thd, SHOW_VAR *var, char *buff) +{ + var->type= SHOW_CHAR; + if (!ssl_acceptor_fd) + var->value= const_cast<char*>("NONE"); + else + switch (SSL_CTX_get_session_cache_mode(ssl_acceptor_fd->ssl_context)) + { + case SSL_SESS_CACHE_OFF: + var->value= const_cast<char*>("OFF"); break; + case SSL_SESS_CACHE_CLIENT: + var->value= const_cast<char*>("CLIENT"); break; + case SSL_SESS_CACHE_SERVER: + var->value= const_cast<char*>("SERVER"); break; + case SSL_SESS_CACHE_BOTH: + var->value= const_cast<char*>("BOTH"); break; + case SSL_SESS_CACHE_NO_AUTO_CLEAR: + var->value= const_cast<char*>("NO_AUTO_CLEAR"); break; + case SSL_SESS_CACHE_NO_INTERNAL_LOOKUP: + var->value= const_cast<char*>("NO_INTERNAL_LOOKUP"); break; + default: + var->value= const_cast<char*>("Unknown"); break; + } + return 0; +} + +/* Functions relying on SSL */ +static int show_ssl_get_version(THD *thd, SHOW_VAR *var, char *buff) +{ + var->type= SHOW_CHAR; + var->value= const_cast<char*>(thd->net.vio->ssl_arg ? + SSL_get_version((SSL*) thd->net.vio->ssl_arg) : ""); + return 0; +} + +static int show_ssl_session_reused(THD *thd, SHOW_VAR *var, char *buff) +{ + var->type= SHOW_LONG; + var->value= buff; + *((long *)buff)= (long)thd->net.vio->ssl_arg ? + SSL_session_reused((SSL*) thd->net.vio->ssl_arg) : + 0; + return 0; +} + +static int show_ssl_get_default_timeout(THD *thd, SHOW_VAR *var, char *buff) +{ + var->type= SHOW_LONG; + var->value= buff; + *((long *)buff)= (long)thd->net.vio->ssl_arg ? + SSL_get_default_timeout((SSL*)thd->net.vio->ssl_arg) : + 0; + return 0; +} + +static int show_ssl_get_verify_mode(THD *thd, SHOW_VAR *var, char *buff) +{ + var->type= SHOW_LONG; + var->value= buff; + *((long *)buff)= (long)thd->net.vio->ssl_arg ? + SSL_get_verify_mode((SSL*)thd->net.vio->ssl_arg) : + 0; + return 0; +} + +static int show_ssl_get_verify_depth(THD *thd, SHOW_VAR *var, char *buff) +{ + var->type= SHOW_LONG; + var->value= buff; + *((long *)buff)= (long)thd->net.vio->ssl_arg ? + SSL_get_verify_depth((SSL*)thd->net.vio->ssl_arg) : + 0; + return 0; +} + +static int show_ssl_get_cipher(THD *thd, SHOW_VAR *var, char *buff) +{ + var->type= SHOW_CHAR; + var->value= const_cast<char*>(thd->net.vio->ssl_arg ? + SSL_get_cipher((SSL*) thd->net.vio->ssl_arg) : ""); + return 0; +} + +static int show_ssl_get_cipher_list(THD *thd, SHOW_VAR *var, char *buff) +{ + var->type= SHOW_CHAR; + var->value= buff; + if (thd->net.vio->ssl_arg) + { + int i; + const char *p; + for (i=0 ; (p= SSL_get_cipher_list((SSL*) thd->net.vio->ssl_arg,i)); i++) + { + buff= strmov(buff, p); + *buff++= ':'; + } + if (i) + buff--; + } + *buff=0; + return 0; +} + +#endif /* HAVE_OPENSSL */ + +#ifdef WITH_INNOBASE_STORAGE_ENGINE +int innodb_export_status(void); +static int show_innodb_vars(THD *thd, SHOW_VAR *var, char *buff) +{ + innodb_export_status(); + var->type= SHOW_ARRAY; + var->value= (char *) &innodb_status_variables; + return 0; +} +#endif + +SHOW_VAR status_vars[]= { {"Aborted_clients", (char*) &aborted_threads, SHOW_LONG}, {"Aborted_connects", (char*) &aborted_connects, SHOW_LONG}, {"Binlog_cache_disk_use", (char*) &binlog_cache_disk_use, SHOW_LONG}, @@ -5948,6 +6634,7 @@ struct show_var_st status_vars[]= { {"Bytes_sent", (char*) offsetof(STATUS_VAR, bytes_sent), SHOW_LONG_STATUS}, {"Com_admin_commands", (char*) offsetof(STATUS_VAR, com_other), SHOW_LONG_STATUS}, {"Com_alter_db", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_ALTER_DB]), SHOW_LONG_STATUS}, + {"Com_alter_event", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_ALTER_EVENT]), SHOW_LONG_STATUS}, {"Com_alter_table", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_ALTER_TABLE]), SHOW_LONG_STATUS}, {"Com_analyze", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_ANALYZE]), SHOW_LONG_STATUS}, {"Com_backup_table", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_BACKUP_TABLE]), SHOW_LONG_STATUS}, @@ -5958,6 +6645,7 @@ struct show_var_st status_vars[]= { {"Com_checksum", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_CHECKSUM]), SHOW_LONG_STATUS}, {"Com_commit", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_COMMIT]), SHOW_LONG_STATUS}, {"Com_create_db", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_CREATE_DB]), SHOW_LONG_STATUS}, + {"Com_create_event", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_CREATE_EVENT]), SHOW_LONG_STATUS}, {"Com_create_function", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_CREATE_FUNCTION]), SHOW_LONG_STATUS}, {"Com_create_index", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_CREATE_INDEX]), SHOW_LONG_STATUS}, {"Com_create_table", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_CREATE_TABLE]), SHOW_LONG_STATUS}, @@ -5966,11 +6654,12 @@ struct show_var_st status_vars[]= { {"Com_delete_multi", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_DELETE_MULTI]), SHOW_LONG_STATUS}, {"Com_do", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_DO]), SHOW_LONG_STATUS}, {"Com_drop_db", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_DROP_DB]), SHOW_LONG_STATUS}, + {"Com_drop_event", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_DROP_EVENT]), SHOW_LONG_STATUS}, {"Com_drop_function", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_DROP_FUNCTION]), SHOW_LONG_STATUS}, {"Com_drop_index", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_DROP_INDEX]), SHOW_LONG_STATUS}, {"Com_drop_table", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_DROP_TABLE]), SHOW_LONG_STATUS}, {"Com_drop_user", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_DROP_USER]), SHOW_LONG_STATUS}, - {"Com_execute_sql", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_EXECUTE]), SHOW_LONG_STATUS}, + {"Com_execute_sql", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_EXECUTE]), SHOW_LONG_STATUS}, {"Com_flush", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_FLUSH]), SHOW_LONG_STATUS}, {"Com_grant", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_GRANT]), SHOW_LONG_STATUS}, {"Com_ha_close", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_HA_CLOSE]), SHOW_LONG_STATUS}, @@ -6007,18 +6696,21 @@ struct show_var_st status_vars[]= { {"Com_show_collations", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_COLLATIONS]), SHOW_LONG_STATUS}, {"Com_show_column_types", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_COLUMN_TYPES]), SHOW_LONG_STATUS}, {"Com_show_create_db", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_CREATE_DB]), SHOW_LONG_STATUS}, + {"Com_show_create_event", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_CREATE_EVENT]), SHOW_LONG_STATUS}, {"Com_show_create_table", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_CREATE]), SHOW_LONG_STATUS}, {"Com_show_databases", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_DATABASES]), SHOW_LONG_STATUS}, + {"Com_show_engine_logs", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_ENGINE_LOGS]), SHOW_LONG_STATUS}, + {"Com_show_engine_mutex", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_ENGINE_MUTEX]), SHOW_LONG_STATUS}, + {"Com_show_engine_status", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_ENGINE_STATUS]), SHOW_LONG_STATUS}, + {"Com_show_events", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_EVENTS]), SHOW_LONG_STATUS}, {"Com_show_errors", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_ERRORS]), SHOW_LONG_STATUS}, {"Com_show_fields", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_FIELDS]), SHOW_LONG_STATUS}, {"Com_show_grants", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_GRANTS]), SHOW_LONG_STATUS}, - {"Com_show_innodb_status", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_INNODB_STATUS]), SHOW_LONG_STATUS}, {"Com_show_keys", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_KEYS]), SHOW_LONG_STATUS}, - {"Com_show_logs", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_LOGS]), SHOW_LONG_STATUS}, {"Com_show_master_status", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_MASTER_STAT]), SHOW_LONG_STATUS}, - {"Com_show_ndb_status", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_NDBCLUSTER_STATUS]), SHOW_LONG_STATUS}, {"Com_show_new_master", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_NEW_MASTER]), SHOW_LONG_STATUS}, {"Com_show_open_tables", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_OPEN_TABLES]), SHOW_LONG_STATUS}, + {"Com_show_plugins", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_PLUGINS]), SHOW_LONG_STATUS}, {"Com_show_privileges", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_PRIVILEGES]), SHOW_LONG_STATUS}, {"Com_show_processlist", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_PROCESSLIST]), SHOW_LONG_STATUS}, {"Com_show_slave_hosts", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_SLAVE_HOSTS]), SHOW_LONG_STATUS}, @@ -6047,15 +6739,15 @@ struct show_var_st status_vars[]= { {"Com_xa_recover", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_XA_RECOVER]),SHOW_LONG_STATUS}, {"Com_xa_rollback", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_XA_ROLLBACK]),SHOW_LONG_STATUS}, {"Com_xa_start", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_XA_START]),SHOW_LONG_STATUS}, - {"Compression", (char*) 0, SHOW_NET_COMPRESSION}, - {"Connections", (char*) &thread_id, SHOW_LONG_CONST}, + {"Compression", (char*) &show_net_compression, SHOW_FUNC}, + {"Connections", (char*) &thread_id, SHOW_LONG_NOFLUSH}, {"Created_tmp_disk_tables", (char*) offsetof(STATUS_VAR, created_tmp_disk_tables), SHOW_LONG_STATUS}, {"Created_tmp_files", (char*) &my_tmp_file_created, SHOW_LONG}, {"Created_tmp_tables", (char*) offsetof(STATUS_VAR, created_tmp_tables), SHOW_LONG_STATUS}, {"Delayed_errors", (char*) &delayed_insert_errors, SHOW_LONG}, - {"Delayed_insert_threads", (char*) &delayed_insert_threads, SHOW_LONG_CONST}, + {"Delayed_insert_threads", (char*) &delayed_insert_threads, SHOW_LONG_NOFLUSH}, {"Delayed_writes", (char*) &delayed_insert_writes, SHOW_LONG}, - {"Flush_commands", (char*) &refresh_version, SHOW_LONG_CONST}, + {"Flush_commands", (char*) &refresh_version, SHOW_LONG_NOFLUSH}, {"Handler_commit", (char*) offsetof(STATUS_VAR, ha_commit_count), SHOW_LONG_STATUS}, {"Handler_delete", (char*) offsetof(STATUS_VAR, ha_delete_count), SHOW_LONG_STATUS}, {"Handler_discover", (char*) offsetof(STATUS_VAR, ha_discover_count), SHOW_LONG_STATUS}, @@ -6071,46 +6763,51 @@ struct show_var_st status_vars[]= { {"Handler_savepoint_rollback",(char*) offsetof(STATUS_VAR, ha_savepoint_rollback_count), SHOW_LONG_STATUS}, {"Handler_update", (char*) offsetof(STATUS_VAR, ha_update_count), SHOW_LONG_STATUS}, {"Handler_write", (char*) offsetof(STATUS_VAR, ha_write_count), SHOW_LONG_STATUS}, -#ifdef HAVE_INNOBASE_DB - {"Innodb_", (char*) &innodb_status_variables, SHOW_VARS}, -#endif /*HAVE_INNOBASE_DB*/ - {"Key_blocks_not_flushed", (char*) &dflt_key_cache_var.global_blocks_changed, SHOW_KEY_CACHE_LONG}, - {"Key_blocks_unused", (char*) &dflt_key_cache_var.blocks_unused, SHOW_KEY_CACHE_CONST_LONG}, - {"Key_blocks_used", (char*) &dflt_key_cache_var.blocks_used, SHOW_KEY_CACHE_CONST_LONG}, - {"Key_read_requests", (char*) &dflt_key_cache_var.global_cache_r_requests, SHOW_KEY_CACHE_LONGLONG}, - {"Key_reads", (char*) &dflt_key_cache_var.global_cache_read, SHOW_KEY_CACHE_LONGLONG}, - {"Key_write_requests", (char*) &dflt_key_cache_var.global_cache_w_requests, SHOW_KEY_CACHE_LONGLONG}, - {"Key_writes", (char*) &dflt_key_cache_var.global_cache_write, SHOW_KEY_CACHE_LONGLONG}, +#ifdef WITH_INNOBASE_STORAGE_ENGINE + {"Innodb", (char*) &show_innodb_vars, SHOW_FUNC}, +#endif /* WITH_INNOBASE_STORAGE_ENGINE */ + {"Key_blocks_not_flushed", (char*) offsetof(KEY_CACHE, global_blocks_changed), SHOW_KEY_CACHE_LONG}, + {"Key_blocks_unused", (char*) offsetof(KEY_CACHE, blocks_unused), SHOW_KEY_CACHE_LONG}, + {"Key_blocks_used", (char*) offsetof(KEY_CACHE, blocks_used), SHOW_KEY_CACHE_LONG}, + {"Key_read_requests", (char*) offsetof(KEY_CACHE, global_cache_r_requests), SHOW_KEY_CACHE_LONGLONG}, + {"Key_reads", (char*) offsetof(KEY_CACHE, global_cache_read), SHOW_KEY_CACHE_LONGLONG}, + {"Key_write_requests", (char*) offsetof(KEY_CACHE, global_cache_w_requests), SHOW_KEY_CACHE_LONGLONG}, + {"Key_writes", (char*) offsetof(KEY_CACHE, global_cache_write), SHOW_KEY_CACHE_LONGLONG}, {"Last_query_cost", (char*) offsetof(STATUS_VAR, last_query_cost), SHOW_DOUBLE_STATUS}, {"Max_used_connections", (char*) &max_used_connections, SHOW_LONG}, -#ifdef HAVE_NDBCLUSTER_DB - {"Ndb_", (char*) &ndb_status_variables, SHOW_VARS}, -#endif /*HAVE_NDBCLUSTER_DB*/ - {"Not_flushed_delayed_rows", (char*) &delayed_rows_in_use, SHOW_LONG_CONST}, - {"Open_files", (char*) &my_file_opened, SHOW_LONG_CONST}, - {"Open_streams", (char*) &my_stream_opened, SHOW_LONG_CONST}, - {"Open_tables", (char*) 0, SHOW_OPENTABLES}, +#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE + {"Ndb", (char*) &ndb_status_variables, SHOW_ARRAY}, +#endif /* WITH_NDBCLUSTER_STORAGE_ENGINE */ + {"Not_flushed_delayed_rows", (char*) &delayed_rows_in_use, SHOW_LONG_NOFLUSH}, + {"Open_files", (char*) &my_file_opened, SHOW_LONG_NOFLUSH}, + {"Open_streams", (char*) &my_stream_opened, SHOW_LONG_NOFLUSH}, + {"Open_table_definitions", (char*) &show_table_definitions, SHOW_FUNC}, + {"Open_tables", (char*) &show_open_tables, SHOW_FUNC}, {"Opened_tables", (char*) offsetof(STATUS_VAR, opened_tables), SHOW_LONG_STATUS}, #ifdef HAVE_QUERY_CACHE - {"Qcache_free_blocks", (char*) &query_cache.free_memory_blocks, SHOW_LONG_CONST}, - {"Qcache_free_memory", (char*) &query_cache.free_memory, SHOW_LONG_CONST}, + {"Qcache_free_blocks", (char*) &query_cache.free_memory_blocks, SHOW_LONG_NOFLUSH}, + {"Qcache_free_memory", (char*) &query_cache.free_memory, SHOW_LONG_NOFLUSH}, {"Qcache_hits", (char*) &query_cache.hits, SHOW_LONG}, {"Qcache_inserts", (char*) &query_cache.inserts, SHOW_LONG}, {"Qcache_lowmem_prunes", (char*) &query_cache.lowmem_prunes, SHOW_LONG}, {"Qcache_not_cached", (char*) &query_cache.refused, SHOW_LONG}, - {"Qcache_queries_in_cache", (char*) &query_cache.queries_in_cache, SHOW_LONG_CONST}, - {"Qcache_total_blocks", (char*) &query_cache.total_blocks, SHOW_LONG_CONST}, + {"Qcache_queries_in_cache", (char*) &query_cache.queries_in_cache, SHOW_LONG_NOFLUSH}, + {"Qcache_total_blocks", (char*) &query_cache.total_blocks, SHOW_LONG_NOFLUSH}, #endif /*HAVE_QUERY_CACHE*/ - {"Questions", (char*) 0, SHOW_QUESTION}, - {"Rpl_status", (char*) 0, SHOW_RPL_STATUS}, + {"Questions", (char*) &show_question, SHOW_FUNC}, +#ifdef HAVE_REPLICATION + {"Rpl_status", (char*) &show_rpl_status, SHOW_FUNC}, +#endif {"Select_full_join", (char*) offsetof(STATUS_VAR, select_full_join_count), SHOW_LONG_STATUS}, {"Select_full_range_join", (char*) offsetof(STATUS_VAR, select_full_range_join_count), SHOW_LONG_STATUS}, {"Select_range", (char*) offsetof(STATUS_VAR, select_range_count), SHOW_LONG_STATUS}, {"Select_range_check", (char*) offsetof(STATUS_VAR, select_range_check_count), SHOW_LONG_STATUS}, {"Select_scan", (char*) offsetof(STATUS_VAR, select_scan_count), SHOW_LONG_STATUS}, {"Slave_open_temp_tables", (char*) &slave_open_temp_tables, SHOW_LONG}, - {"Slave_retried_transactions",(char*) 0, SHOW_SLAVE_RETRIED_TRANS}, - {"Slave_running", (char*) 0, SHOW_SLAVE_RUNNING}, +#ifdef HAVE_REPLICATION + {"Slave_retried_transactions",(char*) &show_slave_retried_trans, SHOW_FUNC}, + {"Slave_running", (char*) &show_slave_running, SHOW_FUNC}, +#endif {"Slow_launch_threads", (char*) &slow_launch_threads, SHOW_LONG}, {"Slow_queries", (char*) offsetof(STATUS_VAR, long_query_count), SHOW_LONG_STATUS}, {"Sort_merge_passes", (char*) offsetof(STATUS_VAR, filesort_merge_passes), SHOW_LONG_STATUS}, @@ -6118,29 +6815,29 @@ struct show_var_st status_vars[]= { {"Sort_rows", (char*) offsetof(STATUS_VAR, filesort_rows), SHOW_LONG_STATUS}, {"Sort_scan", (char*) offsetof(STATUS_VAR, filesort_scan_count), SHOW_LONG_STATUS}, #ifdef HAVE_OPENSSL - {"Ssl_accept_renegotiates", (char*) 0, SHOW_SSL_CTX_SESS_ACCEPT_RENEGOTIATE}, - {"Ssl_accepts", (char*) 0, SHOW_SSL_CTX_SESS_ACCEPT}, - {"Ssl_callback_cache_hits", (char*) 0, SHOW_SSL_CTX_SESS_CB_HITS}, - {"Ssl_cipher", (char*) 0, SHOW_SSL_GET_CIPHER}, - {"Ssl_cipher_list", (char*) 0, SHOW_SSL_GET_CIPHER_LIST}, - {"Ssl_client_connects", (char*) 0, SHOW_SSL_CTX_SESS_CONNECT}, - {"Ssl_connect_renegotiates", (char*) 0, SHOW_SSL_CTX_SESS_CONNECT_RENEGOTIATE}, - {"Ssl_ctx_verify_depth", (char*) 0, SHOW_SSL_CTX_GET_VERIFY_DEPTH}, - {"Ssl_ctx_verify_mode", (char*) 0, SHOW_SSL_CTX_GET_VERIFY_MODE}, - {"Ssl_default_timeout", (char*) 0, SHOW_SSL_GET_DEFAULT_TIMEOUT}, - {"Ssl_finished_accepts", (char*) 0, SHOW_SSL_CTX_SESS_ACCEPT_GOOD}, - {"Ssl_finished_connects", (char*) 0, SHOW_SSL_CTX_SESS_CONNECT_GOOD}, - {"Ssl_session_cache_hits", (char*) 0, SHOW_SSL_CTX_SESS_HITS}, - {"Ssl_session_cache_misses", (char*) 0, SHOW_SSL_CTX_SESS_MISSES}, - {"Ssl_session_cache_mode", (char*) 0, SHOW_SSL_CTX_GET_SESSION_CACHE_MODE}, - {"Ssl_session_cache_overflows", (char*) 0, SHOW_SSL_CTX_SESS_CACHE_FULL}, - {"Ssl_session_cache_size", (char*) 0, SHOW_SSL_CTX_SESS_GET_CACHE_SIZE}, - {"Ssl_session_cache_timeouts", (char*) 0, SHOW_SSL_CTX_SESS_TIMEOUTS}, - {"Ssl_sessions_reused", (char*) 0, SHOW_SSL_SESSION_REUSED}, - {"Ssl_used_session_cache_entries",(char*) 0, SHOW_SSL_CTX_SESS_NUMBER}, - {"Ssl_verify_depth", (char*) 0, SHOW_SSL_GET_VERIFY_DEPTH}, - {"Ssl_verify_mode", (char*) 0, SHOW_SSL_GET_VERIFY_MODE}, - {"Ssl_version", (char*) 0, SHOW_SSL_GET_VERSION}, + {"Ssl_accept_renegotiates", (char*) &show_ssl_ctx_sess_accept_renegotiate, SHOW_FUNC}, + {"Ssl_accepts", (char*) &show_ssl_ctx_sess_accept, SHOW_FUNC}, + {"Ssl_callback_cache_hits", (char*) &show_ssl_ctx_sess_cb_hits, SHOW_FUNC}, + {"Ssl_cipher", (char*) &show_ssl_get_cipher, SHOW_FUNC}, + {"Ssl_cipher_list", (char*) &show_ssl_get_cipher_list, SHOW_FUNC}, + {"Ssl_client_connects", (char*) &show_ssl_ctx_sess_connect, SHOW_FUNC}, + {"Ssl_connect_renegotiates", (char*) &show_ssl_ctx_sess_connect_renegotiate, SHOW_FUNC}, + {"Ssl_ctx_verify_depth", (char*) &show_ssl_ctx_get_verify_depth, SHOW_FUNC}, + {"Ssl_ctx_verify_mode", (char*) &show_ssl_ctx_get_verify_mode, SHOW_FUNC}, + {"Ssl_default_timeout", (char*) &show_ssl_get_default_timeout, SHOW_FUNC}, + {"Ssl_finished_accepts", (char*) &show_ssl_ctx_sess_accept_good, SHOW_FUNC}, + {"Ssl_finished_connects", (char*) &show_ssl_ctx_sess_connect_good, SHOW_FUNC}, + {"Ssl_session_cache_hits", (char*) &show_ssl_ctx_sess_hits, SHOW_FUNC}, + {"Ssl_session_cache_misses", (char*) &show_ssl_ctx_sess_misses, SHOW_FUNC}, + {"Ssl_session_cache_mode", (char*) &show_ssl_ctx_get_session_cache_mode, SHOW_FUNC}, + {"Ssl_session_cache_overflows", (char*) &show_ssl_ctx_sess_cache_full, SHOW_FUNC}, + {"Ssl_session_cache_size", (char*) &show_ssl_ctx_sess_get_cache_size, SHOW_FUNC}, + {"Ssl_session_cache_timeouts", (char*) &show_ssl_ctx_sess_timeouts, SHOW_FUNC}, + {"Ssl_sessions_reused", (char*) &show_ssl_session_reused, SHOW_FUNC}, + {"Ssl_used_session_cache_entries",(char*) &show_ssl_ctx_sess_number, SHOW_FUNC}, + {"Ssl_verify_depth", (char*) &show_ssl_get_verify_depth, SHOW_FUNC}, + {"Ssl_verify_mode", (char*) &show_ssl_get_verify_mode, SHOW_FUNC}, + {"Ssl_version", (char*) &show_ssl_get_version, SHOW_FUNC}, #endif /* HAVE_OPENSSL */ {"Table_locks_immediate", (char*) &locks_immediate, SHOW_LONG}, {"Table_locks_waited", (char*) &locks_waited, SHOW_LONG}, @@ -6149,11 +6846,11 @@ struct show_var_st status_vars[]= { {"Tc_log_page_size", (char*) &tc_log_page_size, SHOW_LONG}, {"Tc_log_page_waits", (char*) &tc_log_page_waits, SHOW_LONG}, #endif - {"Threads_cached", (char*) &cached_thread_count, SHOW_LONG_CONST}, - {"Threads_connected", (char*) &thread_count, SHOW_INT_CONST}, - {"Threads_created", (char*) &thread_created, SHOW_LONG_CONST}, - {"Threads_running", (char*) &thread_running, SHOW_INT_CONST}, - {"Uptime", (char*) 0, SHOW_STARTTIME}, + {"Threads_cached", (char*) &cached_thread_count, SHOW_LONG_NOFLUSH}, + {"Threads_connected", (char*) &thread_count, SHOW_INT}, + {"Threads_created", (char*) &thread_created, SHOW_LONG_NOFLUSH}, + {"Threads_running", (char*) &thread_running, SHOW_INT}, + {"Uptime", (char*) &show_starttime, SHOW_FUNC}, {NullS, NullS, SHOW_LONG} }; @@ -6304,13 +7001,6 @@ static void mysql_init_variables(void) exit(1); multi_keycache_init(); /* set key_cache_hash.default_value = dflt_key_cache */ - /* Initialize structures that is used when processing options */ - replicate_rewrite_db.empty(); - replicate_do_db.empty(); - replicate_ignore_db.empty(); - binlog_do_db.empty(); - binlog_ignore_db.empty(); - /* Set directory paths */ strmake(language, LANGUAGE, sizeof(language)-1); strmake(mysql_real_data_home, get_relative_path(DATADIR), @@ -6337,14 +7027,15 @@ static void mysql_init_variables(void) /* Set default values for some option variables */ - global_system_variables.table_type= DB_TYPE_MYISAM; + global_system_variables.table_type= &myisam_hton; global_system_variables.tx_isolation= ISO_REPEATABLE_READ; global_system_variables.select_limit= (ulonglong) HA_POS_ERROR; max_system_variables.select_limit= (ulonglong) HA_POS_ERROR; global_system_variables.max_join_size= (ulonglong) HA_POS_ERROR; max_system_variables.max_join_size= (ulonglong) HA_POS_ERROR; global_system_variables.old_passwords= 0; - + global_system_variables.old_alter_table= 0; + global_system_variables.binlog_format= BINLOG_FORMAT_UNSPEC; /* Default behavior for 4.1 and 5.0 is to treat NULL values as unequal when collecting index statistics for MyISAM tables. @@ -6357,52 +7048,22 @@ static void mysql_init_variables(void) "d:t:i:o,/tmp/mysqld.trace"); #endif opt_error_log= IF_WIN(1,0); -#ifdef HAVE_BERKELEY_DB - have_berkeley_db= SHOW_OPTION_YES; -#else - have_berkeley_db= SHOW_OPTION_NO; -#endif -#ifdef HAVE_INNOBASE_DB - have_innodb=SHOW_OPTION_YES; -#else - have_innodb=SHOW_OPTION_NO; -#endif - have_isam=SHOW_OPTION_NO; -#ifdef HAVE_EXAMPLE_DB - have_example_db= SHOW_OPTION_YES; +#ifdef HAVE_ROW_BASED_REPLICATION + have_row_based_replication= SHOW_OPTION_YES; #else - have_example_db= SHOW_OPTION_NO; + have_row_based_replication= SHOW_OPTION_NO; #endif -#if defined(HAVE_ARCHIVE_DB) - have_archive_db= SHOW_OPTION_YES; -#else - have_archive_db= SHOW_OPTION_NO; -#endif -#ifdef HAVE_BLACKHOLE_DB - have_blackhole_db= SHOW_OPTION_YES; -#else - have_blackhole_db= SHOW_OPTION_NO; -#endif -#ifdef HAVE_FEDERATED_DB - have_federated_db= SHOW_OPTION_YES; -#else - have_federated_db= SHOW_OPTION_NO; -#endif -#ifdef HAVE_CSV_DB - have_csv_db= SHOW_OPTION_YES; -#else - have_csv_db= SHOW_OPTION_NO; -#endif -#ifdef HAVE_NDBCLUSTER_DB +#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE have_ndbcluster=SHOW_OPTION_DISABLED; + global_system_variables.ndb_index_stat_enable=TRUE; + max_system_variables.ndb_index_stat_enable=TRUE; + global_system_variables.ndb_index_stat_cache_entries=32; + max_system_variables.ndb_index_stat_cache_entries=~0L; + global_system_variables.ndb_index_stat_update_freq=20; + max_system_variables.ndb_index_stat_update_freq=~0L; #else have_ndbcluster=SHOW_OPTION_NO; #endif -#ifdef USE_RAID - have_raid=SHOW_OPTION_YES; -#else - have_raid=SHOW_OPTION_NO; -#endif #ifdef HAVE_OPENSSL have_openssl=SHOW_OPTION_YES; #else @@ -6476,7 +7137,8 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), switch(optid) { case '#': #ifndef DBUG_OFF - DBUG_PUSH(argument ? argument : default_dbug_option); + DBUG_SET(argument ? argument : default_dbug_option); + DBUG_SET_INITIAL(argument ? argument : default_dbug_option); #endif opt_endinfo=1; /* unireg: memory allocation */ break; @@ -6563,14 +7225,12 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), } case (int)OPT_REPLICATE_IGNORE_DB: { - i_string *db = new i_string(argument); - replicate_ignore_db.push_back(db); + rpl_filter->add_ignore_db(argument); break; } case (int)OPT_REPLICATE_DO_DB: { - i_string *db = new i_string(argument); - replicate_do_db.push_back(db); + rpl_filter->add_do_db(argument); break; } case (int)OPT_REPLICATE_REWRITE_DB: @@ -6603,77 +7263,104 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), exit(1); } - i_string_pair *db_pair = new i_string_pair(key, val); - replicate_rewrite_db.push_back(db_pair); + rpl_filter->add_db_rewrite(key, val); break; } case (int)OPT_BINLOG_IGNORE_DB: { - i_string *db = new i_string(argument); - binlog_ignore_db.push_back(db); + binlog_filter->add_ignore_db(argument); + break; + } + case OPT_BINLOG_FORMAT: + { + int id; + if ((id= find_type(argument, &binlog_format_typelib, 2)) <= 0) + { +#ifdef HAVE_ROW_BASED_REPLICATION + fprintf(stderr, + "Unknown binary log format: '%s' " + "(should be one of '%s', '%s', '%s')\n", + argument, + binlog_format_names[BINLOG_FORMAT_STMT], + binlog_format_names[BINLOG_FORMAT_ROW], + binlog_format_names[BINLOG_FORMAT_MIXED]); +#else + fprintf(stderr, + "Unknown binary log format: '%s' (only legal value is '%s')\n", + argument, binlog_format_names[BINLOG_FORMAT_STMT]); +#endif + exit(1); + } + global_system_variables.binlog_format= id-1; break; } case (int)OPT_BINLOG_DO_DB: { - i_string *db = new i_string(argument); - binlog_do_db.push_back(db); + binlog_filter->add_do_db(argument); break; } case (int)OPT_REPLICATE_DO_TABLE: { - if (!do_table_inited) - init_table_rule_hash(&replicate_do_table, &do_table_inited); - if (add_table_rule(&replicate_do_table, argument)) + if (rpl_filter->add_do_table(argument)) { fprintf(stderr, "Could not add do table rule '%s'!\n", argument); exit(1); } - table_rules_on = 1; break; } case (int)OPT_REPLICATE_WILD_DO_TABLE: { - if (!wild_do_table_inited) - init_table_rule_array(&replicate_wild_do_table, - &wild_do_table_inited); - if (add_wild_table_rule(&replicate_wild_do_table, argument)) + if (rpl_filter->add_wild_do_table(argument)) { fprintf(stderr, "Could not add do table rule '%s'!\n", argument); exit(1); } - table_rules_on = 1; break; } case (int)OPT_REPLICATE_WILD_IGNORE_TABLE: { - if (!wild_ignore_table_inited) - init_table_rule_array(&replicate_wild_ignore_table, - &wild_ignore_table_inited); - if (add_wild_table_rule(&replicate_wild_ignore_table, argument)) + if (rpl_filter->add_wild_ignore_table(argument)) { fprintf(stderr, "Could not add ignore table rule '%s'!\n", argument); exit(1); } - table_rules_on = 1; break; } case (int)OPT_REPLICATE_IGNORE_TABLE: { - if (!ignore_table_inited) - init_table_rule_hash(&replicate_ignore_table, &ignore_table_inited); - if (add_table_rule(&replicate_ignore_table, argument)) + if (rpl_filter->add_ignore_table(argument)) { fprintf(stderr, "Could not add ignore table rule '%s'!\n", argument); exit(1); } - table_rules_on = 1; break; } #endif /* HAVE_REPLICATION */ case (int) OPT_SLOW_QUERY_LOG: - opt_slow_log=1; + opt_slow_log= 1; break; +#ifdef WITH_CSV_STORAGE_ENGINE + case OPT_LOG_OUTPUT: + { + if (!argument || !argument[0]) + { + log_output_options= LOG_TABLE; + log_output_str= log_output_typelib.type_names[1]; + } + else + { + log_output_str= argument; + if ((log_output_options= + find_bit_type(argument, &log_output_typelib)) == ~(ulong) 0) + { + fprintf(stderr, "Unknown option to log-output: %s\n", argument); + exit(1); + } + } + break; + } +#endif case (int) OPT_SKIP_NEW: opt_specialflag|= SPECIAL_NO_NEW_FUNC; delay_key_write_options= (uint) DELAY_KEY_WRITE_NONE; @@ -6778,9 +7465,9 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), break; case OPT_STORAGE_ENGINE: { - if ((enum db_type)((global_system_variables.table_type= - ha_resolve_by_name(argument, strlen(argument)))) == - DB_TYPE_UNKNOWN) + LEX_STRING name= { argument, strlen(argument) }; + if ((global_system_variables.table_type= + ha_resolve_by_name(current_thd, &name)) == NULL) { fprintf(stderr,"Unknown/unsupported table type: %s\n",argument); exit(1); @@ -6825,19 +7512,25 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), global_system_variables.tx_isolation= (type-1); break; } -#ifdef HAVE_BERKELEY_DB +#ifdef WITH_BERKELEY_STORAGE_ENGINE case OPT_BDB_NOSYNC: /* Deprecated option */ opt_sync_bdb_logs= 0; /* Fall through */ case OPT_BDB_SYNC: if (!opt_sync_bdb_logs) - berkeley_env_flags|= DB_TXN_NOSYNC; + berkeley_env_flags|= bdb_DB_TXN_NOSYNC; else - berkeley_env_flags&= ~DB_TXN_NOSYNC; + berkeley_env_flags&= ~bdb_DB_TXN_NOSYNC; + break; + case OPT_BDB_LOG_DIRECT: + berkeley_env_flags|= bdb_DB_DIRECT_DB; + break; + case OPT_BDB_DATA_DIRECT: + berkeley_env_flags|= bdb_DB_DIRECT_LOG; break; case OPT_BDB_NO_RECOVER: - berkeley_init_flags&= ~(DB_RECOVER); + berkeley_init_flags&= ~(bdb_DB_RECOVER); break; case OPT_BDB_LOCK: { @@ -6861,12 +7554,12 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), break; } case OPT_BDB_SHARED: - berkeley_init_flags&= ~(DB_PRIVATE); + berkeley_init_flags&= ~(bdb_DB_PRIVATE); berkeley_shared_data= 1; break; -#endif /* HAVE_BERKELEY_DB */ +#endif /* WITH_BERKELEY_STORAGE_ENGINE */ case OPT_BDB: -#ifdef HAVE_BERKELEY_DB +#ifdef WITH_BERKELEY_STORAGE_ENGINE if (opt_bdb) have_berkeley_db= SHOW_OPTION_YES; else @@ -6874,14 +7567,14 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), #endif break; case OPT_NDBCLUSTER: -#ifdef HAVE_NDBCLUSTER_DB +#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE if (opt_ndbcluster) have_ndbcluster= SHOW_OPTION_YES; else have_ndbcluster= SHOW_OPTION_DISABLED; #endif break; -#ifdef HAVE_NDBCLUSTER_DB +#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE case OPT_NDB_MGMD: case OPT_NDB_NODEID: { @@ -6905,9 +7598,31 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), opt_ndb_constrbuf[opt_ndb_constrbuf_len]= 0; opt_ndbcluster_connectstring= opt_ndb_constrbuf; break; + case OPT_NDB_DISTRIBUTION: + int id; + if ((id= find_type(argument, &ndb_distribution_typelib, 2)) <= 0) + { + fprintf(stderr, + "Unknown ndb distribution type: '%s' " + "(should be '%s' or '%s')\n", + argument, + ndb_distribution_names[ND_KEYHASH], + ndb_distribution_names[ND_LINHASH]); + exit(1); + } + opt_ndb_distribution_id= (enum ndb_distribution)(id-1); + break; + case OPT_NDB_EXTRA_LOGGING: + if (!argument) + ndb_extra_logging++; + else if (argument == disabled_my_option) + ndb_extra_logging= 0L; + else + ndb_extra_logging= atoi(argument); + break; #endif case OPT_INNODB: -#ifdef HAVE_INNOBASE_DB +#ifdef WITH_INNOBASE_STORAGE_ENGINE if (opt_innodb) have_innodb= SHOW_OPTION_YES; else @@ -6915,15 +7630,15 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), #endif break; case OPT_INNODB_DATA_FILE_PATH: -#ifdef HAVE_INNOBASE_DB +#ifdef WITH_INNOBASE_STORAGE_ENGINE innobase_data_file_path= argument; #endif break; -#ifdef HAVE_INNOBASE_DB +#ifdef WITH_INNOBASE_STORAGE_ENGINE case OPT_INNODB_LOG_ARCHIVE: innobase_log_archive= argument ? test(atoi(argument)) : 1; break; -#endif /* HAVE_INNOBASE_DB */ +#endif /* WITH_INNOBASE_STORAGE_ENGINE */ case OPT_MYISAM_RECOVER: { if (!argument || !argument[0]) @@ -7071,19 +7786,19 @@ static void get_options(int argc,char **argv) get_one_option))) exit(ho_error); -#ifndef HAVE_NDBCLUSTER_DB +#ifndef WITH_NDBCLUSTER_STORAGE_ENGINE if (opt_ndbcluster) sql_print_warning("this binary does not contain NDBCLUSTER storage engine"); #endif -#ifndef HAVE_INNOBASE_DB +#ifndef WITH_INNOBASE_STORAGE_ENGINE if (opt_innodb) sql_print_warning("this binary does not contain INNODB storage engine"); #endif -#ifndef HAVE_ISAM +#ifndef WITH_ISAM_STORAGE_ENGINE if (opt_isam) sql_print_warning("this binary does not contain ISAM storage engine"); #endif -#ifndef HAVE_BERKELEY_DB +#ifndef WITH_BERKELEY_STORAGE_ENGINE if (opt_bdb) sql_print_warning("this binary does not contain BDB storage engine"); #endif @@ -7156,6 +7871,7 @@ static void get_options(int argc,char **argv) init_global_datetime_format(MYSQL_TIMESTAMP_DATETIME, &global_system_variables.datetime_format)) exit(1); + } @@ -7214,7 +7930,7 @@ fn_format_relative_to_data_home(my_string to, const char *name, dir=tmp_path; } return !fn_format(to, name, dir, extension, - MY_REPLACE_EXT | MY_UNPACK_FILENAME | MY_SAFE_PATH); + MY_APPEND_EXT | MY_UNPACK_FILENAME | MY_SAFE_PATH); } @@ -7236,6 +7952,9 @@ static void fix_paths(void) (void) my_load_path(mysql_home,mysql_home,""); // Resolve current dir (void) my_load_path(mysql_real_data_home,mysql_real_data_home,mysql_home); (void) my_load_path(pidfile_name,pidfile_name,mysql_real_data_home); + (void) my_load_path(opt_plugin_dir, opt_plugin_dir_ptr ? opt_plugin_dir_ptr : + get_relative_path(LIBDIR), mysql_home); + opt_plugin_dir_ptr= opt_plugin_dir; char *sharedir=get_relative_path(SHAREDIR); if (test_if_hard_path(sharedir)) @@ -7392,6 +8111,73 @@ static void create_pid_file() /***************************************************************************** + Instantiate have_xyx for missing storage engines +*****************************************************************************/ +#undef have_berkeley_db +#undef have_innodb +#undef have_ndbcluster +#undef have_example_db +#undef have_archive_db +#undef have_csv_db +#undef have_federated_db +#undef have_partition_db +#undef have_blackhole_db + +SHOW_COMP_OPTION have_berkeley_db= SHOW_OPTION_NO; +SHOW_COMP_OPTION have_innodb= SHOW_OPTION_NO; +SHOW_COMP_OPTION have_ndbcluster= SHOW_OPTION_NO; +SHOW_COMP_OPTION have_example_db= SHOW_OPTION_NO; +SHOW_COMP_OPTION have_archive_db= SHOW_OPTION_NO; +SHOW_COMP_OPTION have_csv_db= SHOW_OPTION_NO; +SHOW_COMP_OPTION have_federated_db= SHOW_OPTION_NO; +SHOW_COMP_OPTION have_partition_db= SHOW_OPTION_NO; +SHOW_COMP_OPTION have_blackhole_db= SHOW_OPTION_NO; + +#ifndef WITH_BERKELEY_STORAGE_ENGINE +bool berkeley_shared_data; +ulong berkeley_max_lock, berkeley_log_buffer_size; +ulonglong berkeley_cache_size; +ulong berkeley_region_size, berkeley_cache_parts; +char *berkeley_home, *berkeley_tmpdir, *berkeley_logdir; +#endif + +#ifndef WITH_INNOBASE_STORAGE_ENGINE +uint innobase_flush_log_at_trx_commit; +ulong innobase_fast_shutdown; +long innobase_mirrored_log_groups, innobase_log_files_in_group; +longlong innobase_log_file_size; +long innobase_log_buffer_size; +longlong innobase_buffer_pool_size; +long innobase_additional_mem_pool_size; +long innobase_buffer_pool_awe_mem_mb; +long innobase_file_io_threads, innobase_lock_wait_timeout; +long innobase_force_recovery; +long innobase_open_files; +char *innobase_data_home_dir, *innobase_data_file_path; +char *innobase_log_group_home_dir, *innobase_log_arch_dir; +char *innobase_unix_file_flush_method; +my_bool innobase_log_archive, + innobase_use_doublewrite, + innobase_use_checksums, + innobase_file_per_table, + innobase_locks_unsafe_for_binlog; + +ulong srv_max_buf_pool_modified_pct; +ulong srv_max_purge_lag; +ulong srv_auto_extend_increment; +ulong srv_n_spin_wait_rounds; +ulong srv_n_free_tickets_to_enter; +ulong srv_thread_sleep_delay; +ulong srv_thread_concurrency; +ulong srv_commit_concurrency; +#endif + +#ifndef WITH_NDBCLUSTER_STORAGE_ENGINE +ulong ndb_cache_check_time; +ulong ndb_extra_logging; +#endif + +/***************************************************************************** Instantiate templates *****************************************************************************/ @@ -7405,3 +8191,5 @@ template class I_List<NAMED_LIST>; template class I_List<Statement>; template class I_List_iterator<Statement>; #endif + + diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 04cc0baa0aa..e42be40be81 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -24,16 +24,42 @@ */ /* - Classes in this file are used in the following way: - 1. For a selection condition a tree of SEL_IMERGE/SEL_TREE/SEL_ARG objects - is created. #of rows in table and index statistics are ignored at this - step. - 2. Created SEL_TREE and index stats data are used to construct a - TABLE_READ_PLAN-derived object (TRP_*). Several 'candidate' table read - plans may be created. - 3. The least expensive table read plan is used to create a tree of - QUICK_SELECT_I-derived objects which are later used for row retrieval. - QUICK_RANGEs are also created in this step. + This file contains: + + RangeAnalysisModule + A module that accepts a condition, index (or partitioning) description, + and builds lists of intervals (in index/partitioning space), such that + all possible records that match the condition are contained within the + intervals. + The entry point for the range analysis module is get_mm_tree() function. + + The lists are returned in form of complicated structure of interlinked + SEL_TREE/SEL_IMERGE/SEL_ARG objects. + See check_quick_keys, find_used_partitions for examples of how to walk + this structure. + All direct "users" of this module are located within this file, too. + + + PartitionPruningModule + A module that accepts a partitioned table, condition, and finds which + partitions we will need to use in query execution. Search down for + "PartitionPruningModule" for description. + The module has single entry point - prune_partitions() function. + + + Range/index_merge/groupby-minmax optimizer module + A module that accepts a table, condition, and returns + - a QUICK_*_SELECT object that can be used to retrieve rows that match + the specified condition, or a "no records will match the condition" + statement. + + The module entry points are + test_quick_select() + get_quick_select_for_ref() + + + Record retrieval code for range/index_merge/groupby-min-max. + Implementations of QUICK_*_SELECT classes. */ #ifdef USE_PRAGMA_IMPLEMENTATION @@ -286,6 +312,48 @@ public: return parent->left == this ? &parent->left : &parent->right; } SEL_ARG *clone_tree(); + + + /* + Check if this SEL_ARG object represents a single-point interval + + SYNOPSIS + is_singlepoint() + + DESCRIPTION + Check if this SEL_ARG object (not tree) represents a single-point + interval, i.e. if it represents a "keypart = const" or + "keypart IS NULL". + + RETURN + TRUE This SEL_ARG object represents a singlepoint interval + FALSE Otherwise + */ + + bool is_singlepoint() + { + /* + Check for NEAR_MIN ("strictly less") and NO_MIN_RANGE (-inf < field) + flags, and the same for right edge. + */ + if (min_flag || max_flag) + return FALSE; + byte *min_val= (byte *)min_value; + byte *max_val= (byte *)min_value; + + if (maybe_null) + { + /* First byte is a NULL value indicator */ + if (*min_val != *max_val) + return FALSE; + + if (*min_val) + return TRUE; /* This "x IS NULL" */ + min_val++; + max_val++; + } + return !field->key_cmp(min_val, max_val); + } }; class SEL_IMERGE; @@ -294,6 +362,11 @@ class SEL_IMERGE; class SEL_TREE :public Sql_alloc { public: + /* + Starting an effort to document this field: + (for some i, keys[i]->type == SEL_ARG::IMPOSSIBLE) => + (type == SEL_TREE::IMPOSSIBLE) + */ enum Type { IMPOSSIBLE, ALWAYS, MAYBE, KEY, KEY_SMALLER } type; SEL_TREE(enum Type type_arg) :type(type_arg) {} SEL_TREE() :type(KEY) @@ -319,25 +392,53 @@ public: /* Note that #records for each key scan is stored in table->quick_rows */ }; +class RANGE_OPT_PARAM +{ +public: + THD *thd; /* Current thread handle */ + TABLE *table; /* Table being analyzed */ + COND *cond; /* Used inside get_mm_tree(). */ + table_map prev_tables; + table_map read_tables; + table_map current_table; /* Bit of the table being analyzed */ + + /* Array of parts of all keys for which range analysis is performed */ + KEY_PART *key_parts; + KEY_PART *key_parts_end; + MEM_ROOT *mem_root; /* Memory that will be freed when range analysis completes */ + MEM_ROOT *old_root; /* Memory that will last until the query end */ + /* + Number of indexes used in range analysis (In SEL_TREE::keys only first + #keys elements are not empty) + */ + uint keys; + + /* + If true, the index descriptions describe real indexes (and it is ok to + call field->optimize_range(real_keynr[...], ...). + Otherwise index description describes fake indexes. + */ + bool using_real_indexes; + + bool remove_jump_scans; + + /* + used_key_no -> table_key_no translation table. Only makes sense if + using_real_indexes==TRUE + */ + uint real_keynr[MAX_KEY]; +}; -typedef struct st_qsel_param { - THD *thd; - TABLE *table; - KEY_PART *key_parts,*key_parts_end; +class PARAM : public RANGE_OPT_PARAM +{ +public: KEY_PART *key[MAX_KEY]; /* First key parts of keys used in the query */ - MEM_ROOT *mem_root, *old_root; - table_map prev_tables,read_tables,current_table; uint baseflag, max_key_part, range_count; - uint keys; /* number of keys used in the query */ - - /* used_key_no -> table_key_no translation table */ - uint real_keynr[MAX_KEY]; char min_key[MAX_KEY_LENGTH+MAX_FIELD_WIDTH], max_key[MAX_KEY_LENGTH+MAX_FIELD_WIDTH]; bool quick; // Don't calulate possible keys - COND *cond; uint fields_bitmap_size; MY_BITMAP needed_fields; /* bitmask of fields needed by the query */ @@ -347,9 +448,9 @@ typedef struct st_qsel_param { uint *imerge_cost_buff; /* buffer for index_merge cost estimates */ uint imerge_cost_buff_size; /* size of the buffer */ - /* TRUE if last checked tree->key can be used for ROR-scan */ + /* TRUE if last checked tree->key can be used for ROR-scan */ bool is_ror_scan; -} PARAM; +}; class TABLE_READ_PLAN; class TRP_RANGE; @@ -360,13 +461,13 @@ class TABLE_READ_PLAN; struct st_ror_scan_info; -static SEL_TREE * get_mm_parts(PARAM *param,COND *cond_func,Field *field, +static SEL_TREE * get_mm_parts(RANGE_OPT_PARAM *param,COND *cond_func,Field *field, Item_func::Functype type,Item *value, Item_result cmp_type); -static SEL_ARG *get_mm_leaf(PARAM *param,COND *cond_func,Field *field, +static SEL_ARG *get_mm_leaf(RANGE_OPT_PARAM *param,COND *cond_func,Field *field, KEY_PART *key_part, Item_func::Functype type,Item *value); -static SEL_TREE *get_mm_tree(PARAM *param,COND *cond); +static SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param,COND *cond); static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts); static ha_rows check_quick_select(PARAM *param,uint index,SEL_ARG *key_tree); @@ -409,8 +510,8 @@ static void print_rowid(byte* val, int len); static void print_quick(QUICK_SELECT_I *quick, const key_map *needed_reg); #endif -static SEL_TREE *tree_and(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2); -static SEL_TREE *tree_or(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2); +static SEL_TREE *tree_and(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2); +static SEL_TREE *tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2); static SEL_ARG *sel_add(SEL_ARG *key1,SEL_ARG *key2); static SEL_ARG *key_or(SEL_ARG *key1,SEL_ARG *key2); static SEL_ARG *key_and(SEL_ARG *key1,SEL_ARG *key2,uint clone_flag); @@ -423,7 +524,7 @@ static bool eq_tree(SEL_ARG* a,SEL_ARG *b); static SEL_ARG null_element(SEL_ARG::IMPOSSIBLE); static bool null_part_in_key(KEY_PART *key_part, const char *key, uint length); -bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2, PARAM* param); +bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2, RANGE_OPT_PARAM* param); /* @@ -455,9 +556,9 @@ public: trees_next(trees), trees_end(trees + PREALLOCED_TREES) {} - int or_sel_tree(PARAM *param, SEL_TREE *tree); - int or_sel_tree_with_checks(PARAM *param, SEL_TREE *new_tree); - int or_sel_imerge_with_checks(PARAM *param, SEL_IMERGE* imerge); + int or_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree); + int or_sel_tree_with_checks(RANGE_OPT_PARAM *param, SEL_TREE *new_tree); + int or_sel_imerge_with_checks(RANGE_OPT_PARAM *param, SEL_IMERGE* imerge); }; @@ -473,7 +574,7 @@ public: -1 - Out of memory. */ -int SEL_IMERGE::or_sel_tree(PARAM *param, SEL_TREE *tree) +int SEL_IMERGE::or_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree) { if (trees_next == trees_end) { @@ -524,7 +625,7 @@ int SEL_IMERGE::or_sel_tree(PARAM *param, SEL_TREE *tree) -1 An error occurred. */ -int SEL_IMERGE::or_sel_tree_with_checks(PARAM *param, SEL_TREE *new_tree) +int SEL_IMERGE::or_sel_tree_with_checks(RANGE_OPT_PARAM *param, SEL_TREE *new_tree) { for (SEL_TREE** tree = trees; tree != trees_next; @@ -558,7 +659,7 @@ int SEL_IMERGE::or_sel_tree_with_checks(PARAM *param, SEL_TREE *new_tree) -1 - An error occurred */ -int SEL_IMERGE::or_sel_imerge_with_checks(PARAM *param, SEL_IMERGE* imerge) +int SEL_IMERGE::or_sel_imerge_with_checks(RANGE_OPT_PARAM *param, SEL_IMERGE* imerge) { for (SEL_TREE** tree= imerge->trees; tree != imerge->trees_next; @@ -604,7 +705,7 @@ inline void imerge_list_and_list(List<SEL_IMERGE> *im1, List<SEL_IMERGE> *im2) other Error, both passed lists are unusable */ -int imerge_list_or_list(PARAM *param, +int imerge_list_or_list(RANGE_OPT_PARAM *param, List<SEL_IMERGE> *im1, List<SEL_IMERGE> *im2) { @@ -624,7 +725,7 @@ int imerge_list_or_list(PARAM *param, other Error */ -int imerge_list_or_tree(PARAM *param, +int imerge_list_or_tree(RANGE_OPT_PARAM *param, List<SEL_IMERGE> *im1, SEL_TREE *tree) { @@ -751,7 +852,7 @@ int QUICK_RANGE_SELECT::init() if (file->inited != handler::NONE) file->ha_index_or_rnd_end(); - DBUG_RETURN(error= file->ha_index_init(index)); + DBUG_RETURN(error= file->ha_index_init(index, 1)); } @@ -776,9 +877,10 @@ QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT() { DBUG_PRINT("info", ("Freeing separate handler %p (free=%d)", file, free_file)); - file->reset(); + file->ha_reset(); file->external_lock(current_thd, F_UNLCK); file->close(); + delete file; } } delete_dynamic(&ranges); /* ranges are allocated in alloc */ @@ -908,13 +1010,14 @@ int QUICK_ROR_INTERSECT_SELECT::init() int QUICK_RANGE_SELECT::init_ror_merged_scan(bool reuse_handler) { handler *save_file= file; + THD *thd; DBUG_ENTER("QUICK_RANGE_SELECT::init_ror_merged_scan"); if (reuse_handler) { DBUG_PRINT("info", ("Reusing handler %p", file)); if (file->extra(HA_EXTRA_KEYREAD) || - file->extra(HA_EXTRA_RETRIEVE_PRIMARY_KEY) || + file->ha_retrieve_all_pk() || init() || reset()) { DBUG_RETURN(1); @@ -929,11 +1032,12 @@ int QUICK_RANGE_SELECT::init_ror_merged_scan(bool reuse_handler) DBUG_RETURN(0); } - THD *thd= current_thd; - if (!(file= get_new_handler(head, thd->mem_root, head->s->db_type))) + thd= head->in_use; + if (!(file= get_new_handler(head->s, thd->mem_root, head->s->db_type))) goto failure; DBUG_PRINT("info", ("Allocated new handler %p", file)); - if (file->ha_open(head->s->path, head->db_stat, HA_OPEN_IGNORE_IF_LOCKED)) + if (file->ha_open(head, head->s->normalized_path.str, head->db_stat, + HA_OPEN_IGNORE_IF_LOCKED)) { /* Caller will free the memory */ goto failure; @@ -942,7 +1046,7 @@ int QUICK_RANGE_SELECT::init_ror_merged_scan(bool reuse_handler) goto failure; if (file->extra(HA_EXTRA_KEYREAD) || - file->extra(HA_EXTRA_RETRIEVE_PRIMARY_KEY) || + file->ha_retrieve_all_pk() || init() || reset()) { file->external_lock(thd, F_UNLCK); @@ -954,6 +1058,8 @@ int QUICK_RANGE_SELECT::init_ror_merged_scan(bool reuse_handler) DBUG_RETURN(0); failure: + if (file) + delete file; file= save_file; DBUG_RETURN(1); } @@ -1658,10 +1764,10 @@ public: static int fill_used_fields_bitmap(PARAM *param) { TABLE *table= param->table; - param->fields_bitmap_size= (table->s->fields/8 + 1); - uchar *tmp; + param->fields_bitmap_size= bitmap_buffer_size(table->s->fields+1); + uint32 *tmp; uint pk; - if (!(tmp= (uchar*)alloc_root(param->mem_root,param->fields_bitmap_size)) || + if (!(tmp= (uint32*) alloc_root(param->mem_root,param->fields_bitmap_size)) || bitmap_init(¶m->needed_fields, tmp, param->fields_bitmap_size*8, FALSE)) return 1; @@ -1780,6 +1886,8 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, param.old_root= thd->mem_root; param.needed_reg= &needed_reg; param.imerge_cost_buff_size= 0; + param.using_real_indexes= TRUE; + param.remove_jump_scans= TRUE; thd->no_errors=1; // Don't warn about NULL init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0); @@ -1853,9 +1961,12 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, read_time= (double) HA_POS_ERROR; goto free_mem; } - if (tree->type != SEL_TREE::KEY && - tree->type != SEL_TREE::KEY_SMALLER) - goto free_mem; + /* + If the tree can't be used for range scans, proceed anyway, as we + can construct a group-min-max quick select + */ + if (tree->type != SEL_TREE::KEY && tree->type != SEL_TREE::KEY_SMALLER) + tree= NULL; } } @@ -1970,6 +2081,1088 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, DBUG_RETURN(records ? test(quick) : -1); } +/**************************************************************************** + * Partition pruning module + ****************************************************************************/ +#ifdef WITH_PARTITION_STORAGE_ENGINE + +/* + PartitionPruningModule + + This part of the code does partition pruning. Partition pruning solves the + following problem: given a query over partitioned tables, find partitions + that we will not need to access (i.e. partitions that we can assume to be + empty) when executing the query. + The set of partitions to prune doesn't depend on which query execution + plan will be used to execute the query. + + HOW IT WORKS + + Partition pruning module makes use of RangeAnalysisModule. The following + examples show how the problem of partition pruning can be reduced to the + range analysis problem: + + EXAMPLE 1 + Consider a query: + + SELECT * FROM t1 WHERE (t1.a < 5 OR t1.a = 10) AND t1.a > 3 AND t1.b='z' + + where table t1 is partitioned using PARTITION BY RANGE(t1.a). An apparent + way to find the used (i.e. not pruned away) partitions is as follows: + + 1. analyze the WHERE clause and extract the list of intervals over t1.a + for the above query we will get this list: {(3 < t1.a < 5), (t1.a=10)} + + 2. for each interval I + { + find partitions that have non-empty intersection with I; + mark them as used; + } + + EXAMPLE 2 + Suppose the table is partitioned by HASH(part_func(t1.a, t1.b)). Then + we need to: + + 1. Analyze the WHERE clause and get a list of intervals over (t1.a, t1.b). + The list of intervals we'll obtain will look like this: + ((t1.a, t1.b) = (1,'foo')), + ((t1.a, t1.b) = (2,'bar')), + ((t1,a, t1.b) > (10,'zz')) + + 2. for each interval I + { + if (the interval has form "(t1.a, t1.b) = (const1, const2)" ) + { + calculate HASH(part_func(t1.a, t1.b)); + find which partition has records with this hash value and mark + it as used; + } + else + { + mark all partitions as used; + break; + } + } + + For both examples the step #1 is exactly what RangeAnalysisModule could + be used to do, if it was provided with appropriate index description + (array of KEY_PART structures). + In example #1, we need to provide it with description of index(t1.a), + in example #2, we need to provide it with description of index(t1.a, t1.b). + + These index descriptions are further called "partitioning index + descriptions". Note that it doesn't matter if such indexes really exist, + as range analysis module only uses the description. + + Putting it all together, partitioning module works as follows: + + prune_partitions() { + call create_partition_index_description(); + + call get_mm_tree(); // invoke the RangeAnalysisModule + + // analyze the obtained interval list and get used partitions + call find_used_partitions(); + } + +*/ + +struct st_part_prune_param; +struct st_part_opt_info; + +typedef void (*mark_full_part_func)(partition_info*, uint32); + +/* + Partition pruning operation context +*/ +typedef struct st_part_prune_param +{ + RANGE_OPT_PARAM range_param; /* Range analyzer parameters */ + + /*************************************************************** + Following fields are filled in based solely on partitioning + definition and not modified after that: + **************************************************************/ + partition_info *part_info; /* Copy of table->part_info */ + /* Function to get partition id from partitioning fields only */ + get_part_id_func get_top_partition_id_func; + /* Function to mark a partition as used (w/all subpartitions if they exist)*/ + mark_full_part_func mark_full_partition_used; + + /* Partitioning 'index' description, array of key parts */ + KEY_PART *key; + + /* + Number of fields in partitioning 'index' definition created for + partitioning (0 if partitioning 'index' doesn't include partitioning + fields) + */ + uint part_fields; + uint subpart_fields; /* Same as above for subpartitioning */ + + /* + Number of the last partitioning field keypart in the index, or -1 if + partitioning index definition doesn't include partitioning fields. + */ + int last_part_partno; + int last_subpart_partno; /* Same as above for supartitioning */ + + /* + is_part_keypart[i] == test(keypart #i in partitioning index is a member + used in partitioning) + Used to maintain current values of cur_part_fields and cur_subpart_fields + */ + my_bool *is_part_keypart; + /* Same as above for subpartitioning */ + my_bool *is_subpart_keypart; + + /*************************************************************** + Following fields form find_used_partitions() recursion context: + **************************************************************/ + SEL_ARG **arg_stack; /* "Stack" of SEL_ARGs */ + SEL_ARG **arg_stack_end; /* Top of the stack */ + /* Number of partitioning fields for which we have a SEL_ARG* in arg_stack */ + uint cur_part_fields; + /* Same as cur_part_fields, but for subpartitioning */ + uint cur_subpart_fields; + + /* Iterator to be used to obtain the "current" set of used partitions */ + PARTITION_ITERATOR part_iter; + + /* Initialized bitmap of no_subparts size */ + MY_BITMAP subparts_bitmap; +} PART_PRUNE_PARAM; + +static bool create_partition_index_description(PART_PRUNE_PARAM *prune_par); +static int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree); +static int find_used_partitions_imerge(PART_PRUNE_PARAM *ppar, + SEL_IMERGE *imerge); +static int find_used_partitions_imerge_list(PART_PRUNE_PARAM *ppar, + List<SEL_IMERGE> &merges); +static void mark_all_partitions_as_used(partition_info *part_info); +static uint32 part_num_to_part_id_range(PART_PRUNE_PARAM* prune_par, + uint32 num); + +#ifndef DBUG_OFF +static void print_partitioning_index(KEY_PART *parts, KEY_PART *parts_end); +static void dbug_print_field(Field *field); +static void dbug_print_segment_range(SEL_ARG *arg, KEY_PART *part); +static void dbug_print_singlepoint_range(SEL_ARG **start, uint num); +#endif + + +/* + Perform partition pruning for a given table and condition. + + SYNOPSIS + prune_partitions() + thd Thread handle + table Table to perform partition pruning for + pprune_cond Condition to use for partition pruning + + DESCRIPTION + This function assumes that all partitions are marked as unused when it + is invoked. The function analyzes the condition, finds partitions that + need to be used to retrieve the records that match the condition, and + marks them as used by setting appropriate bit in part_info->used_partitions + In the worst case all partitions are marked as used. + + NOTE + This function returns promptly if called for non-partitioned table. + + RETURN + TRUE We've inferred that no partitions need to be used (i.e. no table + records will satisfy pprune_cond) + FALSE Otherwise +*/ + +bool prune_partitions(THD *thd, TABLE *table, Item *pprune_cond) +{ + bool retval= FALSE; + partition_info *part_info = table->part_info; + DBUG_ENTER("prune_partitions"); + + if (!part_info) + DBUG_RETURN(FALSE); /* not a partitioned table */ + + if (!pprune_cond) + { + mark_all_partitions_as_used(part_info); + DBUG_RETURN(FALSE); + } + + PART_PRUNE_PARAM prune_param; + MEM_ROOT alloc; + RANGE_OPT_PARAM *range_par= &prune_param.range_param; + + prune_param.part_info= part_info; + + init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0); + range_par->mem_root= &alloc; + range_par->old_root= thd->mem_root; + + if (create_partition_index_description(&prune_param)) + { + mark_all_partitions_as_used(part_info); + free_root(&alloc,MYF(0)); // Return memory & allocator + DBUG_RETURN(FALSE); + } + + range_par->thd= thd; + range_par->table= table; + /* range_par->cond doesn't need initialization */ + range_par->prev_tables= range_par->read_tables= 0; + range_par->current_table= table->map; + + range_par->keys= 1; // one index + range_par->using_real_indexes= FALSE; + range_par->remove_jump_scans= FALSE; + range_par->real_keynr[0]= 0; + + thd->no_errors=1; // Don't warn about NULL + thd->mem_root=&alloc; + + bitmap_clear_all(&part_info->used_partitions); + + prune_param.key= prune_param.range_param.key_parts; + SEL_TREE *tree; + SEL_ARG *arg; + int res; + + tree= get_mm_tree(range_par, pprune_cond); + if (!tree) + goto all_used; + + if (tree->type == SEL_TREE::IMPOSSIBLE) + { + retval= TRUE; + goto end; + } + + if (tree->type != SEL_TREE::KEY && tree->type != SEL_TREE::KEY_SMALLER) + goto all_used; + + if (tree->merges.is_empty()) + { + /* Range analysis has produced a single list of intervals. */ + prune_param.arg_stack_end= prune_param.arg_stack; + prune_param.cur_part_fields= 0; + prune_param.cur_subpart_fields= 0; + init_all_partitions_iterator(part_info, &prune_param.part_iter); + if (!tree->keys[0] || (-1 == (res= find_used_partitions(&prune_param, + tree->keys[0])))) + goto all_used; + } + else + { + if (tree->merges.elements == 1) + { + /* + Range analysis has produced a "merge" of several intervals lists, a + SEL_TREE that represents an expression in form + sel_imerge = (tree1 OR tree2 OR ... OR treeN) + that cannot be reduced to one tree. This can only happen when + partitioning index has several keyparts and the condition is OR of + conditions that refer to different key parts. For example, we'll get + here for "partitioning_field=const1 OR subpartitioning_field=const2" + */ + if (-1 == (res= find_used_partitions_imerge(&prune_param, + tree->merges.head()))) + goto all_used; + } + else + { + /* + Range analysis has produced a list of several imerges, i.e. a + structure that represents a condition in form + imerge_list= (sel_imerge1 AND sel_imerge2 AND ... AND sel_imergeN) + This is produced for complicated WHERE clauses that range analyzer + can't really analyze properly. + */ + if (-1 == (res= find_used_partitions_imerge_list(&prune_param, + tree->merges))) + goto all_used; + } + } + + /* + res == 0 => no used partitions => retval=TRUE + res == 1 => some used partitions => retval=FALSE + res == -1 - we jump over this line to all_used: + */ + retval= test(!res); + goto end; + +all_used: + retval= FALSE; // some partitions are used + mark_all_partitions_as_used(prune_param.part_info); +end: + thd->no_errors=0; + thd->mem_root= range_par->old_root; + free_root(&alloc,MYF(0)); // Return memory & allocator + DBUG_RETURN(retval); +} + + +/* + Store field key image to table record + + SYNOPSIS + store_key_image_to_rec() + field Field which key image should be stored + ptr Field value in key format + len Length of the value, in bytes + + DESCRIPTION + Copy the field value from its key image to the table record. The source + is the value in key image format, occupying len bytes in buffer pointed + by ptr. The destination is table record, in "field value in table record" + format. +*/ + +void store_key_image_to_rec(Field *field, char *ptr, uint len) +{ + /* Do the same as print_key() does */ + if (field->real_maybe_null()) + { + if (*ptr) + { + field->set_null(); + return; + } + ptr++; + } + field->set_key_image(ptr, len); +} + + +/* + For SEL_ARG* array, store sel_arg->min values into table record buffer + + SYNOPSIS + store_selargs_to_rec() + ppar Partition pruning context + start Array of SEL_ARG* for which the minimum values should be stored + num Number of elements in the array + + DESCRIPTION + For each SEL_ARG* interval in the specified array, store the left edge + field value (sel_arg->min, key image format) into the table record. +*/ + +static void store_selargs_to_rec(PART_PRUNE_PARAM *ppar, SEL_ARG **start, + int num) +{ + KEY_PART *parts= ppar->range_param.key_parts; + for (SEL_ARG **end= start + num; start != end; start++) + { + SEL_ARG *sel_arg= (*start); + store_key_image_to_rec(sel_arg->field, sel_arg->min_value, + parts[sel_arg->part].length); + } +} + + +/* Mark a partition as used in the case when there are no subpartitions */ +static void mark_full_partition_used_no_parts(partition_info* part_info, + uint32 part_id) +{ + DBUG_ENTER("mark_full_partition_used_no_parts"); + DBUG_PRINT("enter", ("Mark partition %u as used", part_id)); + bitmap_set_bit(&part_info->used_partitions, part_id); + DBUG_VOID_RETURN; +} + + +/* Mark a partition as used in the case when there are subpartitions */ +static void mark_full_partition_used_with_parts(partition_info *part_info, + uint32 part_id) +{ + uint32 start= part_id * part_info->no_subparts; + uint32 end= start + part_info->no_subparts; + DBUG_ENTER("mark_full_partition_used_with_parts"); + + for (; start != end; start++) + { + DBUG_PRINT("info", ("1:Mark subpartition %u as used", start)); + bitmap_set_bit(&part_info->used_partitions, start); + } + DBUG_VOID_RETURN; +} + +/* + Find the set of used partitions for List<SEL_IMERGE> + SYNOPSIS + find_used_partitions_imerge_list + ppar Partition pruning context. + key_tree Intervals tree to perform pruning for. + + DESCRIPTION + List<SEL_IMERGE> represents "imerge1 AND imerge2 AND ...". + The set of used partitions is an intersection of used partitions sets + for imerge_{i}. + We accumulate this intersection in a separate bitmap. + + RETURN + See find_used_partitions() +*/ + +static int find_used_partitions_imerge_list(PART_PRUNE_PARAM *ppar, + List<SEL_IMERGE> &merges) +{ + MY_BITMAP all_merges; + uint bitmap_bytes; + uint32 *bitmap_buf; + uint n_bits= ppar->part_info->used_partitions.n_bits; + bitmap_bytes= bitmap_buffer_size(n_bits); + if (!(bitmap_buf= (uint32*)alloc_root(ppar->range_param.mem_root, + bitmap_bytes))) + { + /* + Fallback, process just the first SEL_IMERGE. This can leave us with more + partitions marked as used then actually needed. + */ + return find_used_partitions_imerge(ppar, merges.head()); + } + bitmap_init(&all_merges, bitmap_buf, n_bits, FALSE); + bitmap_set_prefix(&all_merges, n_bits); + + List_iterator<SEL_IMERGE> it(merges); + SEL_IMERGE *imerge; + while ((imerge=it++)) + { + int res= find_used_partitions_imerge(ppar, imerge); + if (!res) + { + /* no used partitions on one ANDed imerge => no used partitions at all */ + return 0; + } + + if (res != -1) + bitmap_intersect(&all_merges, &ppar->part_info->used_partitions); + + if (bitmap_is_clear_all(&all_merges)) + return 0; + + bitmap_clear_all(&ppar->part_info->used_partitions); + } + memcpy(ppar->part_info->used_partitions.bitmap, all_merges.bitmap, + bitmap_bytes); + return 1; +} + + +/* + Find the set of used partitions for SEL_IMERGE structure + SYNOPSIS + find_used_partitions_imerge() + ppar Partition pruning context. + key_tree Intervals tree to perform pruning for. + + DESCRIPTION + SEL_IMERGE represents "tree1 OR tree2 OR ...". The implementation is + trivial - just use mark used partitions for each tree and bail out early + if for some tree_{i} all partitions are used. + + RETURN + See find_used_partitions(). +*/ + +static +int find_used_partitions_imerge(PART_PRUNE_PARAM *ppar, SEL_IMERGE *imerge) +{ + int res= 0; + for (SEL_TREE **ptree= imerge->trees; ptree < imerge->trees_next; ptree++) + { + ppar->arg_stack_end= ppar->arg_stack; + ppar->cur_part_fields= 0; + ppar->cur_subpart_fields= 0; + init_all_partitions_iterator(ppar->part_info, &ppar->part_iter); + if (-1 == (res |= find_used_partitions(ppar, (*ptree)->keys[0]))) + return -1; + } + return res; +} + + +/* + Collect partitioning ranges for the SEL_ARG tree and mark partitions as used + + SYNOPSIS + find_used_partitions() + ppar Partition pruning context. + key_tree SEL_ARG range tree to perform pruning for + + DESCRIPTION + This function + * recursively walks the SEL_ARG* tree collecting partitioning "intervals" + * finds the partitions one needs to use to get rows in these intervals + * marks these partitions as used. + The next session desribes the process in greater detail. + + IMPLEMENTATION + TYPES OF RESTRICTIONS THAT WE CAN OBTAIN PARTITIONS FOR + We can find out which [sub]partitions to use if we obtain restrictions on + [sub]partitioning fields in the following form: + 1. "partition_field1=const1 AND ... AND partition_fieldN=constN" + 1.1 Same as (1) but for subpartition fields + + If partitioning supports interval analysis (i.e. partitioning is a + function of a single table field, and partition_info:: + get_part_iter_for_interval != NULL), then we can also use condition in + this form: + 2. "const1 <=? partition_field <=? const2" + 2.1 Same as (2) but for subpartition_field + + INFERRING THE RESTRICTIONS FROM SEL_ARG TREE + + The below is an example of what SEL_ARG tree may represent: + + (start) + | $ + | Partitioning keyparts $ subpartitioning keyparts + | $ + | ... ... $ + | | | $ + | +---------+ +---------+ $ +-----------+ +-----------+ + \-| par1=c1 |--| par2=c2 |-----| subpar1=c3|--| subpar2=c5| + +---------+ +---------+ $ +-----------+ +-----------+ + | $ | | + | $ | +-----------+ + | $ | | subpar2=c6| + | $ | +-----------+ + | $ | + | $ +-----------+ +-----------+ + | $ | subpar1=c4|--| subpar2=c8| + | $ +-----------+ +-----------+ + | $ + | $ + +---------+ $ +------------+ +------------+ + | par1=c2 |------------------| subpar1=c10|--| subpar2=c12| + +---------+ $ +------------+ +------------+ + | $ + ... $ + + The up-down connections are connections via SEL_ARG::left and + SEL_ARG::right. A horizontal connection to the right is the + SEL_ARG::next_key_part connection. + + find_used_partitions() traverses the entire tree via recursion on + * SEL_ARG::next_key_part (from left to right on the picture) + * SEL_ARG::left|right (up/down on the pic). Left-right recursion is + performed for each depth level. + + Recursion descent on SEL_ARG::next_key_part is used to accumulate (in + ppar->arg_stack) constraints on partitioning and subpartitioning fields. + For the example in the above picture, one of stack states is: + in find_used_partitions(key_tree = "subpar2=c5") (***) + in find_used_partitions(key_tree = "subpar1=c3") + in find_used_partitions(key_tree = "par2=c2") (**) + in find_used_partitions(key_tree = "par1=c1") + in prune_partitions(...) + We apply partitioning limits as soon as possible, e.g. when we reach the + depth (**), we find which partition(s) correspond to "par1=c1 AND par2=c2", + and save them in ppar->part_iter. + When we reach the depth (***), we find which subpartition(s) correspond to + "subpar1=c3 AND subpar2=c5", and then mark appropriate subpartitions in + appropriate subpartitions as used. + + It is possible that constraints on some partitioning fields are missing. + For the above example, consider this stack state: + in find_used_partitions(key_tree = "subpar2=c12") (***) + in find_used_partitions(key_tree = "subpar1=c10") + in find_used_partitions(key_tree = "par1=c2") + in prune_partitions(...) + Here we don't have constraints for all partitioning fields. Since we've + never set the ppar->part_iter to contain used set of partitions, we use + its default "all partitions" value. We get subpartition id for + "subpar1=c3 AND subpar2=c5", and mark that subpartition as used in every + partition. + + The inverse is also possible: we may get constraints on partitioning + fields, but not constraints on subpartitioning fields. In that case, + calls to find_used_partitions() with depth below (**) will return -1, + and we will mark entire partition as used. + + TODO + Replace recursion on SEL_ARG::left and SEL_ARG::right with a loop + + RETURN + 1 OK, one or more [sub]partitions are marked as used. + 0 The passed condition doesn't match any partitions + -1 Couldn't infer any partition pruning "intervals" from the passed + SEL_ARG* tree (which means that all partitions should be marked as + used) Marking partitions as used is the responsibility of the caller. +*/ + +static +int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree) +{ + int res, left_res=0, right_res=0; + int partno= (int)key_tree->part; + bool pushed= FALSE; + bool set_full_part_if_bad_ret= FALSE; + + if (key_tree->left != &null_element) + { + if (-1 == (left_res= find_used_partitions(ppar,key_tree->left))) + return -1; + } + + if (key_tree->type == SEL_ARG::KEY_RANGE) + { + if (partno == 0 && (NULL != ppar->part_info->get_part_iter_for_interval)) + { + /* + Partitioning is done by RANGE|INTERVAL(monotonic_expr(fieldX)), and + we got "const1 CMP fieldX CMP const2" interval <-- psergey-todo: change + */ + DBUG_EXECUTE("info", dbug_print_segment_range(key_tree, + ppar->range_param. + key_parts);); + res= ppar->part_info-> + get_part_iter_for_interval(ppar->part_info, + FALSE, + key_tree->min_value, + key_tree->max_value, + key_tree->min_flag | key_tree->max_flag, + &ppar->part_iter); + if (!res) + goto go_right; /* res=0 --> no satisfying partitions */ + if (res == -1) + { + //get a full range iterator + init_all_partitions_iterator(ppar->part_info, &ppar->part_iter); + } + /* + Save our intent to mark full partition as used if we will not be able + to obtain further limits on subpartitions + */ + set_full_part_if_bad_ret= TRUE; + goto process_next_key_part; + } + + if (partno == ppar->last_subpart_partno && + (NULL != ppar->part_info->get_subpart_iter_for_interval)) + { + PARTITION_ITERATOR subpart_iter; + DBUG_EXECUTE("info", dbug_print_segment_range(key_tree, + ppar->range_param. + key_parts);); + res= ppar->part_info-> + get_subpart_iter_for_interval(ppar->part_info, + TRUE, + key_tree->min_value, + key_tree->max_value, + key_tree->min_flag | key_tree->max_flag, + &subpart_iter); + DBUG_ASSERT(res); /* We can't get "no satisfying subpartitions" */ + if (res == -1) + return -1; /* all subpartitions satisfy */ + + uint32 subpart_id; + bitmap_clear_all(&ppar->subparts_bitmap); + while ((subpart_id= subpart_iter.get_next(&subpart_iter)) != NOT_A_PARTITION_ID) + bitmap_set_bit(&ppar->subparts_bitmap, subpart_id); + + /* Mark each partition as used in each subpartition. */ + uint32 part_id; + while ((part_id= ppar->part_iter.get_next(&ppar->part_iter)) != + NOT_A_PARTITION_ID) + { + for (uint i= 0; i < ppar->part_info->no_subparts; i++) + if (bitmap_is_set(&ppar->subparts_bitmap, i)) + bitmap_set_bit(&ppar->part_info->used_partitions, + part_id * ppar->part_info->no_subparts + i); + } + goto go_right; + } + + if (key_tree->is_singlepoint()) + { + pushed= TRUE; + ppar->cur_part_fields+= ppar->is_part_keypart[partno]; + ppar->cur_subpart_fields+= ppar->is_subpart_keypart[partno]; + *(ppar->arg_stack_end++) = key_tree; + + if (partno == ppar->last_part_partno && + ppar->cur_part_fields == ppar->part_fields) + { + /* + Ok, we've got "fieldN<=>constN"-type SEL_ARGs for all partitioning + fields. Save all constN constants into table record buffer. + */ + store_selargs_to_rec(ppar, ppar->arg_stack, ppar->part_fields); + DBUG_EXECUTE("info", dbug_print_singlepoint_range(ppar->arg_stack, + ppar->part_fields);); + uint32 part_id; + longlong func_value; + /* Find in which partition the {const1, ...,constN} tuple goes */ + if (ppar->get_top_partition_id_func(ppar->part_info, &part_id, + &func_value)) + { + res= 0; /* No satisfying partitions */ + goto pop_and_go_right; + } + /* Rembember the limit we got - single partition #part_id */ + init_single_partition_iterator(part_id, &ppar->part_iter); + + /* + If there are no subpartitions/we fail to get any limit for them, + then we'll mark full partition as used. + */ + set_full_part_if_bad_ret= TRUE; + goto process_next_key_part; + } + + if (partno == ppar->last_subpart_partno && + ppar->cur_subpart_fields == ppar->subpart_fields) + { + /* + Ok, we've got "fieldN<=>constN"-type SEL_ARGs for all subpartitioning + fields. Save all constN constants into table record buffer. + */ + store_selargs_to_rec(ppar, ppar->arg_stack_end - ppar->subpart_fields, + ppar->subpart_fields); + DBUG_EXECUTE("info", dbug_print_singlepoint_range(ppar->arg_stack_end- + ppar->subpart_fields, + ppar->subpart_fields);); + /* Find the subpartition (it's HASH/KEY so we always have one) */ + partition_info *part_info= ppar->part_info; + uint32 subpart_id= part_info->get_subpartition_id(part_info); + + /* Mark this partition as used in each subpartition. */ + uint32 part_id; + while ((part_id= ppar->part_iter.get_next(&ppar->part_iter)) != + NOT_A_PARTITION_ID) + { + bitmap_set_bit(&part_info->used_partitions, + part_id * part_info->no_subparts + subpart_id); + } + res= 1; /* Some partitions were marked as used */ + goto pop_and_go_right; + } + } + else + { + /* + Can't handle condition on current key part. If we're that deep that + we're processing subpartititoning's key parts, this means we'll not be + able to infer any suitable condition, so bail out. + */ + if (partno >= ppar->last_part_partno) + return -1; + } + } + +process_next_key_part: + if (key_tree->next_key_part) + res= find_used_partitions(ppar, key_tree->next_key_part); + else + res= -1; + + if (set_full_part_if_bad_ret) + { + if (res == -1) + { + /* Got "full range" for subpartitioning fields */ + uint32 part_id; + bool found= FALSE; + while ((part_id= ppar->part_iter.get_next(&ppar->part_iter)) != NOT_A_PARTITION_ID) + { + ppar->mark_full_partition_used(ppar->part_info, part_id); + found= TRUE; + } + res= test(found); + } + /* + Restore the "used partitions iterator" to the default setting that + specifies iteration over all partitions. + */ + init_all_partitions_iterator(ppar->part_info, &ppar->part_iter); + } + + if (pushed) + { +pop_and_go_right: + /* Pop this key part info off the "stack" */ + ppar->arg_stack_end--; + ppar->cur_part_fields-= ppar->is_part_keypart[partno]; + ppar->cur_subpart_fields-= ppar->is_subpart_keypart[partno]; + } + + if (res == -1) + return -1; +go_right: + if (key_tree->right != &null_element) + { + if (-1 == (right_res= find_used_partitions(ppar,key_tree->right))) + return -1; + } + return (left_res || right_res || res); +} + + +static void mark_all_partitions_as_used(partition_info *part_info) +{ + bitmap_set_all(&part_info->used_partitions); +} + + +/* + Check if field types allow to construct partitioning index description + + SYNOPSIS + fields_ok_for_partition_index() + pfield NULL-terminated array of pointers to fields. + + DESCRIPTION + For an array of fields, check if we can use all of the fields to create + partitioning index description. + + We can't process GEOMETRY fields - for these fields singlepoint intervals + cant be generated, and non-singlepoint are "special" kinds of intervals + to which our processing logic can't be applied. + + It is not known if we could process ENUM fields, so they are disabled to be + on the safe side. + + RETURN + TRUE Yes, fields can be used in partitioning index + FALSE Otherwise +*/ + +static bool fields_ok_for_partition_index(Field **pfield) +{ + if (!pfield) + return FALSE; + for (; (*pfield); pfield++) + { + enum_field_types ftype= (*pfield)->real_type(); + if (ftype == FIELD_TYPE_ENUM || ftype == FIELD_TYPE_GEOMETRY) + return FALSE; + } + return TRUE; +} + + +/* + Create partition index description and fill related info in the context + struct + + SYNOPSIS + create_partition_index_description() + prune_par INOUT Partition pruning context + + DESCRIPTION + Create partition index description. Partition index description is: + + part_index(used_fields_list(part_expr), used_fields_list(subpart_expr)) + + If partitioning/sub-partitioning uses BLOB or Geometry fields, then + corresponding fields_list(...) is not included into index description + and we don't perform partition pruning for partitions/subpartitions. + + RETURN + TRUE Out of memory or can't do partition pruning at all + FALSE OK +*/ + +static bool create_partition_index_description(PART_PRUNE_PARAM *ppar) +{ + RANGE_OPT_PARAM *range_par= &(ppar->range_param); + partition_info *part_info= ppar->part_info; + uint used_part_fields, used_subpart_fields; + + used_part_fields= fields_ok_for_partition_index(part_info->part_field_array) ? + part_info->no_part_fields : 0; + used_subpart_fields= + fields_ok_for_partition_index(part_info->subpart_field_array)? + part_info->no_subpart_fields : 0; + + uint total_parts= used_part_fields + used_subpart_fields; + + ppar->part_fields= used_part_fields; + ppar->last_part_partno= (int)used_part_fields - 1; + + ppar->subpart_fields= used_subpart_fields; + ppar->last_subpart_partno= + used_subpart_fields?(int)(used_part_fields + used_subpart_fields - 1): -1; + + if (part_info->is_sub_partitioned()) + { + ppar->mark_full_partition_used= mark_full_partition_used_with_parts; + ppar->get_top_partition_id_func= part_info->get_part_partition_id; + } + else + { + ppar->mark_full_partition_used= mark_full_partition_used_no_parts; + ppar->get_top_partition_id_func= part_info->get_partition_id; + } + + KEY_PART *key_part; + MEM_ROOT *alloc= range_par->mem_root; + if (!total_parts || + !(key_part= (KEY_PART*)alloc_root(alloc, sizeof(KEY_PART)* + total_parts)) || + !(ppar->arg_stack= (SEL_ARG**)alloc_root(alloc, sizeof(SEL_ARG*)* + total_parts)) || + !(ppar->is_part_keypart= (my_bool*)alloc_root(alloc, sizeof(my_bool)* + total_parts)) || + !(ppar->is_subpart_keypart= (my_bool*)alloc_root(alloc, sizeof(my_bool)* + total_parts))) + return TRUE; + + if (ppar->subpart_fields) + { + uint32 *buf; + uint32 bufsize= bitmap_buffer_size(ppar->part_info->no_subparts); + if (!(buf= (uint32*)alloc_root(alloc, bufsize))) + return TRUE; + bitmap_init(&ppar->subparts_bitmap, buf, ppar->part_info->no_subparts, FALSE); + } + range_par->key_parts= key_part; + Field **field= (ppar->part_fields)? part_info->part_field_array : + part_info->subpart_field_array; + bool in_subpart_fields= FALSE; + for (uint part= 0; part < total_parts; part++, key_part++) + { + key_part->key= 0; + key_part->part= part; + key_part->length= (*field)->pack_length_in_rec(); + /* + psergey-todo: check yet again if this is correct for tricky field types, + e.g. see "Fix a fatal error in decimal key handling" in open_binary_frm() + */ + key_part->store_length= (*field)->pack_length(); + if ((*field)->real_maybe_null()) + key_part->store_length+= HA_KEY_NULL_LENGTH; + if ((*field)->type() == FIELD_TYPE_BLOB || + (*field)->real_type() == MYSQL_TYPE_VARCHAR) + key_part->store_length+= HA_KEY_BLOB_LENGTH; + + key_part->field= (*field); + key_part->image_type = Field::itRAW; + /* We don't set key_parts->null_bit as it will not be used */ + + ppar->is_part_keypart[part]= !in_subpart_fields; + ppar->is_subpart_keypart[part]= in_subpart_fields; + + if (!*(++field)) + { + field= part_info->subpart_field_array; + in_subpart_fields= TRUE; + } + } + range_par->key_parts_end= key_part; + + DBUG_EXECUTE("info", print_partitioning_index(range_par->key_parts, + range_par->key_parts_end);); + return FALSE; +} + + +#ifndef DBUG_OFF + +static void print_partitioning_index(KEY_PART *parts, KEY_PART *parts_end) +{ + DBUG_ENTER("print_partitioning_index"); + DBUG_LOCK_FILE; + fprintf(DBUG_FILE, "partitioning INDEX("); + for (KEY_PART *p=parts; p != parts_end; p++) + { + fprintf(DBUG_FILE, "%s%s", p==parts?"":" ,", p->field->field_name); + } + fputs(");\n", DBUG_FILE); + DBUG_UNLOCK_FILE; + DBUG_VOID_RETURN; +} + +/* Print field value into debug trace, in NULL-aware way. */ +static void dbug_print_field(Field *field) +{ + if (field->is_real_null()) + fprintf(DBUG_FILE, "NULL"); + else + { + char buf[256]; + String str(buf, sizeof(buf), &my_charset_bin); + str.length(0); + String *pstr; + pstr= field->val_str(&str); + fprintf(DBUG_FILE, "'%s'", pstr->c_ptr_safe()); + } +} + + +/* Print a "c1 < keypartX < c2" - type interval into debug trace. */ +static void dbug_print_segment_range(SEL_ARG *arg, KEY_PART *part) +{ + DBUG_ENTER("dbug_print_segment_range"); + DBUG_LOCK_FILE; + if (!(arg->min_flag & NO_MIN_RANGE)) + { + store_key_image_to_rec(part->field, (char*)(arg->min_value), part->length); + dbug_print_field(part->field); + if (arg->min_flag & NEAR_MIN) + fputs(" < ", DBUG_FILE); + else + fputs(" <= ", DBUG_FILE); + } + + fprintf(DBUG_FILE, "%s", part->field->field_name); + + if (!(arg->max_flag & NO_MAX_RANGE)) + { + if (arg->max_flag & NEAR_MAX) + fputs(" < ", DBUG_FILE); + else + fputs(" <= ", DBUG_FILE); + store_key_image_to_rec(part->field, (char*)(arg->max_value), part->length); + dbug_print_field(part->field); + } + fputs("\n", DBUG_FILE); + DBUG_UNLOCK_FILE; + DBUG_VOID_RETURN; +} + + +/* + Print a singlepoint multi-keypart range interval to debug trace + + SYNOPSIS + dbug_print_singlepoint_range() + start Array of SEL_ARG* ptrs representing conditions on key parts + num Number of elements in the array. + + DESCRIPTION + This function prints a "keypartN=constN AND ... AND keypartK=constK"-type + interval to debug trace. +*/ + +static void dbug_print_singlepoint_range(SEL_ARG **start, uint num) +{ + DBUG_ENTER("dbug_print_singlepoint_range"); + DBUG_LOCK_FILE; + SEL_ARG **end= start + num; + + for (SEL_ARG **arg= start; arg != end; arg++) + { + Field *field= (*arg)->field; + fprintf(DBUG_FILE, "%s%s=", (arg==start)?"":", ", field->field_name); + dbug_print_field(field); + } + fputs("\n", DBUG_FILE); + DBUG_UNLOCK_FILE; + DBUG_VOID_RETURN; +} +#endif + +/**************************************************************************** + * Partition pruning code ends + ****************************************************************************/ +#endif + /* Get cost of 'sweep' full records retrieval. @@ -2404,7 +3597,7 @@ static ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg) { ROR_SCAN_INFO *ror_scan; - uchar *bitmap_buf; + uint32 *bitmap_buf; uint keynr; DBUG_ENTER("make_ror_scan"); @@ -2419,8 +3612,8 @@ ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg) ror_scan->sel_arg= sel_arg; ror_scan->records= param->table->quick_rows[keynr]; - if (!(bitmap_buf= (uchar*)alloc_root(param->mem_root, - param->fields_bitmap_size))) + if (!(bitmap_buf= (uint32*)alloc_root(param->mem_root, + param->fields_bitmap_size))) DBUG_RETURN(NULL); if (bitmap_init(&ror_scan->covered_fields, bitmap_buf, @@ -2534,12 +3727,13 @@ static ROR_INTERSECT_INFO* ror_intersect_init(const PARAM *param) { ROR_INTERSECT_INFO *info; - uchar* buf; + uint32* buf; if (!(info= (ROR_INTERSECT_INFO*)alloc_root(param->mem_root, sizeof(ROR_INTERSECT_INFO)))) return NULL; info->param= param; - if (!(buf= (uchar*)alloc_root(param->mem_root, param->fields_bitmap_size))) + if (!(buf= (uint32*)alloc_root(param->mem_root, + param->fields_bitmap_size))) return NULL; if (bitmap_init(&info->covered_fields, buf, param->fields_bitmap_size*8, FALSE)) @@ -2556,7 +3750,7 @@ void ror_intersect_cpy(ROR_INTERSECT_INFO *dst, const ROR_INTERSECT_INFO *src) { dst->param= src->param; memcpy(dst->covered_fields.bitmap, src->covered_fields.bitmap, - src->covered_fields.bitmap_size); + no_bytes_in_map(&src->covered_fields)); dst->out_rows= src->out_rows; dst->is_covering= src->is_covering; dst->index_records= src->index_records; @@ -3098,9 +4292,9 @@ TRP_ROR_INTERSECT *get_best_covering_ror_intersect(PARAM *param, /*I=set of all covering indexes */ ror_scan_mark= tree->ror_scans; - uchar buf[MAX_KEY/8+1]; + uint32 int_buf[MAX_KEY/32+1]; MY_BITMAP covered_fields; - if (bitmap_init(&covered_fields, buf, nbits, FALSE)) + if (bitmap_init(&covered_fields, int_buf, nbits, FALSE)) DBUG_RETURN(0); bitmap_clear_all(&covered_fields); @@ -3427,7 +4621,7 @@ QUICK_SELECT_I *TRP_ROR_UNION::make_quick(PARAM *param, 0 on error */ -static SEL_TREE *get_ne_mm_tree(PARAM *param, Item_func *cond_func, +static SEL_TREE *get_ne_mm_tree(RANGE_OPT_PARAM *param, Item_func *cond_func, Field *field, Item *lt_value, Item *gt_value, Item_result cmp_type) @@ -3462,7 +4656,7 @@ static SEL_TREE *get_ne_mm_tree(PARAM *param, Item_func *cond_func, Pointer to the tree built tree */ -static SEL_TREE *get_func_mm_tree(PARAM *param, Item_func *cond_func, +static SEL_TREE *get_func_mm_tree(RANGE_OPT_PARAM *param, Item_func *cond_func, Field *field, Item *value, Item_result cmp_type, bool inv) { @@ -3555,7 +4749,7 @@ static SEL_TREE *get_func_mm_tree(PARAM *param, Item_func *cond_func, /* make a select tree of all keys in condition */ -static SEL_TREE *get_mm_tree(PARAM *param,COND *cond) +static SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param,COND *cond) { SEL_TREE *tree=0; SEL_TREE *ftree= 0; @@ -3728,7 +4922,7 @@ static SEL_TREE *get_mm_tree(PARAM *param,COND *cond) static SEL_TREE * -get_mm_parts(PARAM *param, COND *cond_func, Field *field, +get_mm_parts(RANGE_OPT_PARAM *param, COND *cond_func, Field *field, Item_func::Functype type, Item *value, Item_result cmp_type) { @@ -3778,7 +4972,7 @@ get_mm_parts(PARAM *param, COND *cond_func, Field *field, static SEL_ARG * -get_mm_leaf(PARAM *param, COND *conf_func, Field *field, KEY_PART *key_part, +get_mm_leaf(RANGE_OPT_PARAM *param, COND *conf_func, Field *field, KEY_PART *key_part, Item_func::Functype type,Item *value) { uint maybe_null=(uint) field->real_maybe_null(); @@ -3837,8 +5031,11 @@ get_mm_leaf(PARAM *param, COND *conf_func, Field *field, KEY_PART *key_part, !(conf_func->compare_collation()->state & MY_CS_BINSORT)) goto end; - optimize_range= field->optimize_range(param->real_keynr[key_part->key], - key_part->part); + if (param->using_real_indexes) + optimize_range= field->optimize_range(param->real_keynr[key_part->key], + key_part->part); + else + optimize_range= TRUE; if (type == Item_func::LIKE_FUNC) { @@ -4105,7 +5302,7 @@ sel_add(SEL_ARG *key1,SEL_ARG *key2) static SEL_TREE * -tree_and(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2) +tree_and(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2) { DBUG_ENTER("tree_and"); if (!tree1) @@ -4175,7 +5372,8 @@ tree_and(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2) using index_merge. */ -bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2, PARAM* param) +bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2, + RANGE_OPT_PARAM* param) { key_map common_keys= tree1->keys_map; DBUG_ENTER("sel_trees_can_be_ored"); @@ -4201,8 +5399,84 @@ bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2, PARAM* param) DBUG_RETURN(FALSE); } + +/* + Remove the trees that are not suitable for record retrieval. + SYNOPSIS + param Range analysis parameter + tree Tree to be processed, tree->type is KEY or KEY_SMALLER + + DESCRIPTION + This function walks through tree->keys[] and removes the SEL_ARG* trees + that are not "maybe" trees (*) and cannot be used to construct quick range + selects. + (*) - have type MAYBE or MAYBE_KEY. Perhaps we should remove trees of + these types here as well. + + A SEL_ARG* tree cannot be used to construct quick select if it has + tree->part != 0. (e.g. it could represent "keypart2 < const"). + + WHY THIS FUNCTION IS NEEDED + + Normally we allow construction of SEL_TREE objects that have SEL_ARG + trees that do not allow quick range select construction. For example for + " keypart1=1 AND keypart2=2 " the execution will proceed as follows: + tree1= SEL_TREE { SEL_ARG{keypart1=1} } + tree2= SEL_TREE { SEL_ARG{keypart2=2} } -- can't make quick range select + from this + call tree_and(tree1, tree2) -- this joins SEL_ARGs into a usable SEL_ARG + tree. + + There is an exception though: when we construct index_merge SEL_TREE, + any SEL_ARG* tree that cannot be used to construct quick range select can + be removed, because current range analysis code doesn't provide any way + that tree could be later combined with another tree. + Consider an example: we should not construct + st1 = SEL_TREE { + merges = SEL_IMERGE { + SEL_TREE(t.key1part1 = 1), + SEL_TREE(t.key2part2 = 2) -- (*) + } + }; + because + - (*) cannot be used to construct quick range select, + - There is no execution path that would cause (*) to be converted to + a tree that could be used. + + The latter is easy to verify: first, notice that the only way to convert + (*) into a usable tree is to call tree_and(something, (*)). + + Second look at what tree_and/tree_or function would do when passed a + SEL_TREE that has the structure like st1 tree has, and conlcude that + tree_and(something, (*)) will not be called. + + RETURN + 0 Ok, some suitable trees left + 1 No tree->keys[] left. +*/ + +static bool remove_nonrange_trees(RANGE_OPT_PARAM *param, SEL_TREE *tree) +{ + bool res= FALSE; + for (uint i=0; i < param->keys; i++) + { + if (tree->keys[i]) + { + if (tree->keys[i]->part) + { + tree->keys[i]= NULL; + tree->keys_map.clear_bit(i); + } + else + res= TRUE; + } + } + return !res; +} + + static SEL_TREE * -tree_or(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2) +tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2) { DBUG_ENTER("tree_or"); if (!tree1 || !tree2) @@ -4244,6 +5518,13 @@ tree_or(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2) /* ok, two trees have KEY type but cannot be used without index merge */ if (tree1->merges.is_empty() && tree2->merges.is_empty()) { + if (param->remove_jump_scans) + { + bool no_trees= remove_nonrange_trees(param, tree1); + no_trees= no_trees || remove_nonrange_trees(param, tree2); + if (no_trees) + DBUG_RETURN(new SEL_TREE(SEL_TREE::ALWAYS)); + } SEL_IMERGE *merge; /* both trees are "range" trees, produce new index merge structure */ if (!(result= new SEL_TREE()) || !(merge= new SEL_IMERGE()) || @@ -4266,7 +5547,9 @@ tree_or(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2) /* one tree is index merge tree and another is range tree */ if (tree1->merges.is_empty()) swap_variables(SEL_TREE*, tree1, tree2); - + + if (param->remove_jump_scans && remove_nonrange_trees(param, tree2)) + DBUG_RETURN(new SEL_TREE(SEL_TREE::ALWAYS)); /* add tree2 to tree1->merges, checking if it collapses to ALWAYS */ if (imerge_list_or_tree(param, &tree1->merges, tree2)) result= new SEL_TREE(SEL_TREE::ALWAYS); @@ -5883,7 +7166,7 @@ int QUICK_INDEX_MERGE_SELECT::read_keys_and_merge() (This also creates a deficiency - it is possible that we will retrieve parts of key that are not used by current query at all.) */ - if (head->file->extra(HA_EXTRA_RETRIEVE_PRIMARY_KEY)) + if (head->file->ha_retrieve_all_pk()) DBUG_RETURN(1); cur_quick_it.rewind(); @@ -6153,7 +7436,7 @@ int QUICK_RANGE_SELECT::reset() in_range= FALSE; cur_range= (QUICK_RANGE**) ranges.buffer; - if (file->inited == handler::NONE && (error= file->ha_index_init(index))) + if (file->inited == handler::NONE && (error= file->ha_index_init(index,1))) DBUG_RETURN(error); /* Do not allocate the buffers twice. */ @@ -6208,6 +7491,14 @@ int QUICK_RANGE_SELECT::reset() multi_range_buff->buffer= mrange_buff; multi_range_buff->buffer_end= mrange_buff + mrange_bufsiz; multi_range_buff->end_of_used_area= mrange_buff; +#ifdef HAVE_purify + /* + We need this until ndb will use the buffer efficiently + (Now ndb stores complete row in here, instead of only the used fields + which gives us valgrind warnings in compare_record[]) + */ + bzero((char*) mrange_buff, mrange_bufsiz); +#endif } DBUG_RETURN(0); } @@ -6412,7 +7703,7 @@ int QUICK_RANGE_SELECT_GEOM::get_next() (byte*) range->min_key, range->min_length, (ha_rkey_function)(range->flag ^ GEOM_FLAG)); - if (result != HA_ERR_KEY_NOT_FOUND) + if (result != HA_ERR_KEY_NOT_FOUND && result != HA_ERR_END_OF_FILE) DBUG_RETURN(result); range=0; // Not found, to next range } @@ -6555,7 +7846,7 @@ int QUICK_SELECT_DESC::get_next() } if (result) { - if (result != HA_ERR_KEY_NOT_FOUND) + if (result != HA_ERR_KEY_NOT_FOUND && result != HA_ERR_END_OF_FILE) DBUG_RETURN(result); range=0; // Not found, to next range continue; @@ -8238,7 +9529,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::reset(void) DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::reset"); file->extra(HA_EXTRA_KEYREAD); /* We need only the key attributes */ - result= file->ha_index_init(index); + result= file->ha_index_init(index, 1); result= file->index_last(record); if (result == HA_ERR_END_OF_FILE) DBUG_RETURN(0); @@ -8314,7 +9605,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::get_next() DBUG_ASSERT(is_last_prefix <= 0); if (result == HA_ERR_KEY_NOT_FOUND) continue; - else if (result) + if (result) break; if (have_min) @@ -8344,10 +9635,11 @@ int QUICK_GROUP_MIN_MAX_SELECT::get_next() HA_READ_KEY_EXACT); result= have_min ? min_res : have_max ? max_res : result; - } - while (result == HA_ERR_KEY_NOT_FOUND && is_last_prefix != 0); + } while ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) && + is_last_prefix != 0); if (result == 0) + { /* Partially mimic the behavior of end_select_send. Copy the field data from Item_field::field into Item_field::result_field @@ -8355,6 +9647,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::get_next() other fields in non-ANSI SQL mode). */ copy_fields(&join->tmp_table_param); + } else if (result == HA_ERR_KEY_NOT_FOUND) result= HA_ERR_END_OF_FILE; @@ -8381,6 +9674,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::get_next() RETURN 0 on success HA_ERR_KEY_NOT_FOUND if no MIN key was found that fulfills all conditions. + HA_ERR_END_OF_FILE - "" - other if some error occurred */ @@ -8434,7 +9728,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_min() if (key_cmp(index_info->key_part, group_prefix, real_prefix_len)) key_restore(record, tmp_record, index_info, 0); } - else if (result == HA_ERR_KEY_NOT_FOUND) + else if (result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) result= 0; /* There is a result in any case. */ } } @@ -8459,6 +9753,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_min() RETURN 0 on success HA_ERR_KEY_NOT_FOUND if no MAX key was found that fulfills all conditions. + HA_ERR_END_OF_FILE - "" - other if some error occurred */ @@ -8559,6 +9854,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_prefix() 0 on success HA_ERR_KEY_NOT_FOUND if there is no key with the given prefix in any of the ranges + HA_ERR_END_OF_FILE - "" - other if some error */ @@ -8603,11 +9899,12 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_min_in_range() result= file->index_read(record, group_prefix, search_prefix_len, find_flag); - if ((result == HA_ERR_KEY_NOT_FOUND) && - (cur_range->flag & (EQ_RANGE | NULL_RANGE))) - continue; /* Check the next range. */ - else if (result) + if (result) { + if ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) && + (cur_range->flag & (EQ_RANGE | NULL_RANGE))) + continue; /* Check the next range. */ + /* In all other cases (HA_ERR_*, HA_READ_KEY_EXACT with NO_MIN_RANGE, HA_READ_AFTER_KEY, HA_READ_KEY_OR_NEXT) if the lookup failed for this @@ -8634,7 +9931,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_min_in_range() /* Check if record belongs to the current group. */ if (key_cmp(index_info->key_part, group_prefix, real_prefix_len)) { - result = HA_ERR_KEY_NOT_FOUND; + result= HA_ERR_KEY_NOT_FOUND; continue; } @@ -8652,7 +9949,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_min_in_range() if (!((cur_range->flag & NEAR_MAX) && (cmp_res == -1) || (cmp_res <= 0))) { - result = HA_ERR_KEY_NOT_FOUND; + result= HA_ERR_KEY_NOT_FOUND; continue; } } @@ -8691,6 +9988,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_min_in_range() 0 on success HA_ERR_KEY_NOT_FOUND if there is no key with the given prefix in any of the ranges + HA_ERR_END_OF_FILE - "" - other if some error */ @@ -8736,10 +10034,12 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_max_in_range() result= file->index_read(record, group_prefix, search_prefix_len, find_flag); - if ((result == HA_ERR_KEY_NOT_FOUND) && (cur_range->flag & EQ_RANGE)) - continue; /* Check the next range. */ if (result) { + if ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) && + (cur_range->flag & EQ_RANGE)) + continue; /* Check the next range. */ + /* In no key was found with this upper bound, there certainly are no keys in the ranges to the left. @@ -8876,8 +10176,6 @@ static void print_sel_tree(PARAM *param, SEL_TREE *tree, key_map *tree_map, int idx; char buff[1024]; DBUG_ENTER("print_sel_tree"); - if (! _db_on_) - DBUG_VOID_RETURN; String tmp(buff,sizeof(buff),&my_charset_bin); tmp.length(0); @@ -8906,9 +10204,7 @@ static void print_ror_scans_arr(TABLE *table, const char *msg, struct st_ror_scan_info **start, struct st_ror_scan_info **end) { - DBUG_ENTER("print_ror_scans"); - if (! _db_on_) - DBUG_VOID_RETURN; + DBUG_ENTER("print_ror_scans_arr"); char buff[1024]; String tmp(buff,sizeof(buff),&my_charset_bin); @@ -8972,7 +10268,7 @@ static void print_quick(QUICK_SELECT_I *quick, const key_map *needed_reg) { char buf[MAX_KEY/8+1]; DBUG_ENTER("print_quick"); - if (! _db_on_ || !quick) + if (!quick) DBUG_VOID_RETURN; DBUG_LOCK_FILE; diff --git a/sql/opt_range.h b/sql/opt_range.h index cdb00ea7d0c..bc2496b0769 100644 --- a/sql/opt_range.h +++ b/sql/opt_range.h @@ -249,6 +249,7 @@ public: struct st_qsel_param; +class PARAM; class SEL_ARG; /* @@ -283,12 +284,12 @@ protected: QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table, struct st_table_ref *ref, ha_rows records); - friend bool get_quick_keys(struct st_qsel_param *param, + friend bool get_quick_keys(PARAM *param, QUICK_RANGE_SELECT *quick,KEY_PART *key, SEL_ARG *key_tree, char *min_key, uint min_key_flag, char *max_key, uint max_key_flag); - friend QUICK_RANGE_SELECT *get_quick_select(struct st_qsel_param*,uint idx, + friend QUICK_RANGE_SELECT *get_quick_select(PARAM*,uint idx, SEL_ARG *key_tree, MEM_ROOT *alloc); friend class QUICK_SELECT_DESC; @@ -718,4 +719,9 @@ QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table, ha_rows records); uint get_index_for_order(TABLE *table, ORDER *order, ha_rows limit); +#ifdef WITH_PARTITION_STORAGE_ENGINE +bool prune_partitions(THD *thd, TABLE *table, Item *pprune_cond); +void store_key_image_to_rec(Field *field, char *ptr, uint len); +#endif + #endif diff --git a/sql/opt_sum.cc b/sql/opt_sum.cc index ed8e694dcb7..4158031b9a9 100644 --- a/sql/opt_sum.cc +++ b/sql/opt_sum.cc @@ -205,7 +205,7 @@ int opt_sum_query(TABLE_LIST *tables, List<Item> &all_fields,COND *conds) const_result= 0; break; } - error= table->file->ha_index_init((uint) ref.key); + error= table->file->ha_index_init((uint) ref.key, 1); if (!ref.key_length) error= table->file->index_first(table->record[0]); @@ -292,7 +292,7 @@ int opt_sum_query(TABLE_LIST *tables, List<Item> &all_fields,COND *conds) const_result= 0; break; } - error= table->file->ha_index_init((uint) ref.key); + error= table->file->ha_index_init((uint) ref.key, 1); if (!ref.key_length) error= table->file->index_last(table->record[0]); diff --git a/sql/parse_file.cc b/sql/parse_file.cc index 041b770ac0b..2a602e9ba28 100644 --- a/sql/parse_file.cc +++ b/sql/parse_file.cc @@ -224,10 +224,23 @@ sql_create_definition_file(const LEX_STRING *dir, const LEX_STRING *file_name, File_option *param; DBUG_ENTER("sql_create_definition_file"); DBUG_PRINT("enter", ("Dir: %s, file: %s, base 0x%lx", - dir->str, file_name->str, (ulong) base)); + dir ? dir->str : "(null)", + file_name->str, (ulong) base)); - fn_format(path, file_name->str, dir->str, 0, MY_UNPACK_FILENAME); - path_end= strlen(path); + if (dir) + { + fn_format(path, file_name->str, dir->str, 0, MY_UNPACK_FILENAME); + path_end= strlen(path); + } + else + { + /* + if not dir is passed, it means file_name is a full path, + including dir name, file name itself, and an extension, + and with unpack_filename() executed over it. + */ + path_end= strxnmov(path, FN_REFLEN, file_name->str, NullS) - path; + } // temporary file name path[path_end]='~'; @@ -355,11 +368,11 @@ my_bool rename_in_schema_file(const char *schema, const char *old_name, { char old_path[FN_REFLEN], new_path[FN_REFLEN], arc_path[FN_REFLEN]; - strxnmov(old_path, FN_REFLEN, mysql_data_home, "/", schema, "/", + strxnmov(old_path, FN_REFLEN-1, mysql_data_home, "/", schema, "/", old_name, reg_ext, NullS); (void) unpack_filename(old_path, old_path); - strxnmov(new_path, FN_REFLEN, mysql_data_home, "/", schema, "/", + strxnmov(new_path, FN_REFLEN-1, mysql_data_home, "/", schema, "/", new_name, reg_ext, NullS); (void) unpack_filename(new_path, new_path); @@ -367,7 +380,7 @@ my_bool rename_in_schema_file(const char *schema, const char *old_name, return 1; /* check if arc_dir exists */ - strxnmov(arc_path, FN_REFLEN, mysql_data_home, "/", schema, "/arc", NullS); + strxnmov(arc_path, FN_REFLEN-1, mysql_data_home, "/", schema, "/arc", NullS); (void) unpack_filename(arc_path, arc_path); if (revision > 0 && !access(arc_path, F_OK)) @@ -414,7 +427,7 @@ sql_parse_prepare(const LEX_STRING *file_name, MEM_ROOT *mem_root, char *end, *sign; File_parser *parser; File file; - DBUG_ENTER("sql__parse_prepare"); + DBUG_ENTER("sql_parse_prepare"); if (!my_stat(file_name->str, &stat_info, MYF(MY_WME))) { diff --git a/sql/partition_element.h b/sql/partition_element.h new file mode 100644 index 00000000000..8a11c332897 --- /dev/null +++ b/sql/partition_element.h @@ -0,0 +1,66 @@ +/* Copyright (C) 2000,200666666 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/** + * An enum and a struct to handle partitioning and subpartitioning. + */ +enum partition_type { + NOT_A_PARTITION= 0, + RANGE_PARTITION, + HASH_PARTITION, + LIST_PARTITION +}; + +enum partition_state { + PART_NORMAL= 0, + PART_IS_DROPPED= 1, + PART_TO_BE_DROPPED= 2, + PART_TO_BE_ADDED= 3, + PART_TO_BE_REORGED= 4, + PART_REORGED_DROPPED= 5, + PART_CHANGED= 6, + PART_IS_CHANGED= 7, + PART_IS_ADDED= 8 +}; + +class partition_element :public Sql_alloc { +public: + List<partition_element> subpartitions; + List<longlong> list_val_list; + ulonglong part_max_rows; + ulonglong part_min_rows; + char *partition_name; + char *tablespace_name; + longlong range_value; + char* part_comment; + char* data_file_name; + char* index_file_name; + handlerton *engine_type; + enum partition_state part_state; + uint16 nodegroup_id; + + partition_element() + : part_max_rows(0), part_min_rows(0), partition_name(NULL), + tablespace_name(NULL), range_value(0), part_comment(NULL), + data_file_name(NULL), index_file_name(NULL), + engine_type(NULL),part_state(PART_NORMAL), + nodegroup_id(UNDEF_NODEGROUP) + { + subpartitions.empty(); + list_val_list.empty(); + } + ~partition_element() {} +}; diff --git a/sql/partition_info.cc b/sql/partition_info.cc new file mode 100644 index 00000000000..66e0d366116 --- /dev/null +++ b/sql/partition_info.cc @@ -0,0 +1,349 @@ +/* Copyright (C) 2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Some general useful functions */ + +#ifdef USE_PRAGMA_IMPLEMENTATION +#pragma implementation +#endif + +#include "mysql_priv.h" +#include "ha_partition.h" + +#ifdef WITH_PARTITION_STORAGE_ENGINE + + +/* + Create a memory area where default partition names are stored and fill it + up with the names. + + SYNOPSIS + create_default_partition_names() + part_no Partition number for subparts + no_parts Number of partitions + start_no Starting partition number + subpart Is it subpartitions + + RETURN VALUE + A pointer to the memory area of the default partition names + + DESCRIPTION + A support routine for the partition code where default values are + generated. + The external routine needing this code is check_partition_info +*/ + +#define MAX_PART_NAME_SIZE 16 + +char *partition_info::create_default_partition_names(uint part_no, uint no_parts, + uint start_no, bool is_subpart) +{ + char *ptr= sql_calloc(no_parts*MAX_PART_NAME_SIZE); + char *move_ptr= ptr; + uint i= 0; + DBUG_ENTER("create_default_partition_names"); + + if (likely(ptr != 0)) + { + do + { + if (is_subpart) + my_sprintf(move_ptr, (move_ptr,"p%usp%u", part_no, (start_no + i))); + else + my_sprintf(move_ptr, (move_ptr,"p%u", (start_no + i))); + move_ptr+=MAX_PART_NAME_SIZE; + } while (++i < no_parts); + } + else + { + mem_alloc_error(no_parts*MAX_PART_NAME_SIZE); + } + DBUG_RETURN(ptr); +} + + +/* + Set up all the default partitions not set-up by the user in the SQL + statement. Also perform a number of checks that the user hasn't tried + to use default values where no defaults exists. + + SYNOPSIS + set_up_default_partitions() + part_info The reference to all partition information + file A reference to a handler of the table + max_rows Maximum number of rows stored in the table + start_no Starting partition number + + RETURN VALUE + TRUE Error, attempted default values not possible + FALSE Ok, default partitions set-up + + DESCRIPTION + The routine uses the underlying handler of the partitioning to define + the default number of partitions. For some handlers this requires + knowledge of the maximum number of rows to be stored in the table. + This routine only accepts HASH and KEY partitioning and thus there is + no subpartitioning if this routine is successful. + The external routine needing this code is check_partition_info +*/ + +bool partition_info::set_up_default_partitions(handler *file, ulonglong max_rows, + uint start_no) +{ + uint i; + char *default_name; + bool result= TRUE; + DBUG_ENTER("partition_info::set_up_default_partitions"); + + if (part_type != HASH_PARTITION) + { + const char *error_string; + if (part_type == RANGE_PARTITION) + error_string= partition_keywords[PKW_RANGE].str; + else + error_string= partition_keywords[PKW_LIST].str; + my_error(ER_PARTITIONS_MUST_BE_DEFINED_ERROR, MYF(0), error_string); + goto end; + } + if (no_parts == 0) + no_parts= file->get_default_no_partitions(max_rows); + if (unlikely(no_parts > MAX_PARTITIONS)) + { + my_error(ER_TOO_MANY_PARTITIONS_ERROR, MYF(0)); + goto end; + } + if (unlikely((!(default_name= create_default_partition_names(0, no_parts, + start_no, + FALSE))))) + goto end; + i= 0; + do + { + partition_element *part_elem= new partition_element(); + if (likely(part_elem != 0 && + (!partitions.push_back(part_elem)))) + { + part_elem->engine_type= default_engine_type; + part_elem->partition_name= default_name; + default_name+=MAX_PART_NAME_SIZE; + } + else + { + mem_alloc_error(sizeof(partition_element)); + goto end; + } + } while (++i < no_parts); + result= FALSE; +end: + DBUG_RETURN(result); +} + + +/* + Set up all the default subpartitions not set-up by the user in the SQL + statement. Also perform a number of checks that the default partitioning + becomes an allowed partitioning scheme. + + SYNOPSIS + set_up_default_subpartitions() + part_info The reference to all partition information + file A reference to a handler of the table + max_rows Maximum number of rows stored in the table + + RETURN VALUE + TRUE Error, attempted default values not possible + FALSE Ok, default partitions set-up + + DESCRIPTION + The routine uses the underlying handler of the partitioning to define + the default number of partitions. For some handlers this requires + knowledge of the maximum number of rows to be stored in the table. + This routine is only called for RANGE or LIST partitioning and those + need to be specified so only subpartitions are specified. + The external routine needing this code is check_partition_info +*/ + +bool partition_info::set_up_default_subpartitions(handler *file, + ulonglong max_rows) +{ + uint i, j; + char *default_name, *name_ptr; + bool result= TRUE; + partition_element *part_elem; + List_iterator<partition_element> part_it(partitions); + DBUG_ENTER("partition_info::set_up_default_subpartitions"); + + if (no_subparts == 0) + no_subparts= file->get_default_no_partitions(max_rows); + if (unlikely((no_parts * no_subparts) > MAX_PARTITIONS)) + { + my_error(ER_TOO_MANY_PARTITIONS_ERROR, MYF(0)); + goto end; + } + i= 0; + do + { + part_elem= part_it++; + j= 0; + name_ptr= create_default_partition_names(i, no_subparts, (uint)0, TRUE); + if (unlikely(!name_ptr)) + goto end; + do + { + partition_element *subpart_elem= new partition_element(); + if (likely(subpart_elem != 0 && + (!part_elem->subpartitions.push_back(subpart_elem)))) + { + subpart_elem->engine_type= default_engine_type; + subpart_elem->partition_name= name_ptr; + name_ptr+= MAX_PART_NAME_SIZE; + } + else + { + mem_alloc_error(sizeof(partition_element)); + goto end; + } + } while (++j < no_subparts); + } while (++i < no_parts); + result= FALSE; +end: + DBUG_RETURN(result); +} + + +/* + Support routine for check_partition_info + + SYNOPSIS + set_up_defaults_for_partitioning() + part_info The reference to all partition information + file A reference to a handler of the table + max_rows Maximum number of rows stored in the table + start_no Starting partition number + + RETURN VALUE + TRUE Error, attempted default values not possible + FALSE Ok, default partitions set-up + + DESCRIPTION + Set up defaults for partition or subpartition (cannot set-up for both, + this will return an error. +*/ + +bool partition_info::set_up_defaults_for_partitioning(handler *file, + ulonglong max_rows, + uint start_no) +{ + DBUG_ENTER("partition_info::set_up_defaults_for_partitioning"); + + if (!default_partitions_setup) + { + default_partitions_setup= TRUE; + if (use_default_partitions) + DBUG_RETURN(set_up_default_partitions(file, max_rows, start_no)); + if (is_sub_partitioned() && + use_default_subpartitions) + DBUG_RETURN(set_up_default_subpartitions(file, max_rows)); + } + DBUG_RETURN(FALSE); +} + + +/* + A support function to check if a partition element's name is unique + + SYNOPSIS + has_unique_name() + partition_element element to check + + RETURN VALUES + TRUE Has unique name + FALSE Doesn't +*/ + +bool partition_info::has_unique_name(partition_element *element) +{ + DBUG_ENTER("partition_info::has_unique_name"); + + const char *name_to_check= element->partition_name; + List_iterator<partition_element> parts_it(partitions); + + partition_element *el; + while ((el= (parts_it++))) + { + if (!(my_strcasecmp(system_charset_info, el->partition_name, + name_to_check)) && el != element) + DBUG_RETURN(FALSE); + + if (!el->subpartitions.is_empty()) + { + partition_element *sub_el; + List_iterator<partition_element> subparts_it(el->subpartitions); + while ((sub_el= (subparts_it++))) + { + if (!(my_strcasecmp(system_charset_info, sub_el->partition_name, + name_to_check)) && sub_el != element) + DBUG_RETURN(FALSE); + } + } + } + DBUG_RETURN(TRUE); +} + + +/* + A support function to check partition names for duplication in a + partitioned table + + SYNOPSIS + has_unique_names() + + RETURN VALUES + TRUE Has unique part and subpart names + FALSE Doesn't + + DESCRIPTION + Checks that the list of names in the partitions doesn't contain any + duplicated names. +*/ + +char *partition_info::has_unique_names() +{ + DBUG_ENTER("partition_info::has_unique_names"); + + List_iterator<partition_element> parts_it(partitions); + + partition_element *el; + while ((el= (parts_it++))) + { + if (! has_unique_name(el)) + DBUG_RETURN(el->partition_name); + + if (!el->subpartitions.is_empty()) + { + List_iterator<partition_element> subparts_it(el->subpartitions); + partition_element *subel; + while ((subel= (subparts_it++))) + { + if (! has_unique_name(subel)) + DBUG_RETURN(subel->partition_name); + } + } + } + DBUG_RETURN(NULL); +} + +#endif /* WITH_PARTITION_STORAGE_ENGINE */ diff --git a/sql/partition_info.h b/sql/partition_info.h new file mode 100644 index 00000000000..c8cb4ae407a --- /dev/null +++ b/sql/partition_info.h @@ -0,0 +1,271 @@ +/* Copyright (C) 2000,2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifdef USE_PRAGMA_INTERFACE +#pragma interface /* gcc class implementation */ +#endif + +#include "partition_element.h" + +class partition_info; + +/* Some function typedefs */ +typedef int (*get_part_id_func)(partition_info *part_info, + uint32 *part_id, + longlong *func_value); +typedef uint32 (*get_subpart_id_func)(partition_info *part_info); + + + +class partition_info : public Sql_alloc +{ +public: + /* + * Here comes a set of definitions needed for partitioned table handlers. + */ + List<partition_element> partitions; + List<partition_element> temp_partitions; + + List<char> part_field_list; + List<char> subpart_field_list; + + /* + If there is no subpartitioning, use only this func to get partition ids. + If there is subpartitioning, use the this func to get partition id when + you have both partition and subpartition fields. + */ + get_part_id_func get_partition_id; + + /* Get partition id when we don't have subpartition fields */ + get_part_id_func get_part_partition_id; + + /* + Get subpartition id when we have don't have partition fields by we do + have subpartition ids. + Mikael said that for given constant tuple + {subpart_field1, ..., subpart_fieldN} the subpartition id will be the + same in all subpartitions + */ + get_subpart_id_func get_subpartition_id; + + /* NULL-terminated array of fields used in partitioned expression */ + Field **part_field_array; + /* NULL-terminated array of fields used in subpartitioned expression */ + Field **subpart_field_array; + + /* + Array of all fields used in partition and subpartition expression, + without duplicates, NULL-terminated. + */ + Field **full_part_field_array; + + Item *part_expr; + Item *subpart_expr; + + Item *item_free_list; + + /* + A bitmap of partitions used by the current query. + Usage pattern: + * The handler->extra(HA_EXTRA_RESET) call at query start/end sets all + partitions to be unused. + * Before index/rnd_init(), partition pruning code sets the bits for used + partitions. + */ + MY_BITMAP used_partitions; + + union { + longlong *range_int_array; + LIST_PART_ENTRY *list_array; + }; + + /******************************************** + * INTERVAL ANALYSIS + ********************************************/ + /* + Partitioning interval analysis function for partitioning, or NULL if + interval analysis is not supported for this kind of partitioning. + */ + get_partitions_in_range_iter get_part_iter_for_interval; + /* + Partitioning interval analysis function for subpartitioning, or NULL if + interval analysis is not supported for this kind of partitioning. + */ + get_partitions_in_range_iter get_subpart_iter_for_interval; + + /* + Valid iff + get_part_iter_for_interval=get_part_iter_for_interval_via_walking: + controls how we'll process "field < C" and "field > C" intervals. + If the partitioning function F is strictly increasing, then for any x, y + "x < y" => "F(x) < F(y)" (*), i.e. when we get interval "field < C" + we can perform partition pruning on the equivalent "F(field) < F(C)". + + If the partitioning function not strictly increasing (it is simply + increasing), then instead of (*) we get "x < y" => "F(x) <= F(y)" + i.e. for interval "field < C" we can perform partition pruning for + "F(field) <= F(C)". + */ + bool range_analysis_include_bounds; + /******************************************** + * INTERVAL ANALYSIS ENDS + ********************************************/ + + char* part_info_string; + + char *part_func_string; + char *subpart_func_string; + + uchar *part_state; + + partition_element *curr_part_elem; + partition_element *current_partition; + /* + These key_map's are used for Partitioning to enable quick decisions + on whether we can derive more information about which partition to + scan just by looking at what index is used. + */ + key_map all_fields_in_PF, all_fields_in_PPF, all_fields_in_SPF; + key_map some_fields_in_PF; + + handlerton *default_engine_type; + Item_result part_result_type; + partition_type part_type; + partition_type subpart_type; + + uint part_info_len; + uint part_state_len; + uint part_func_len; + uint subpart_func_len; + + uint no_parts; + uint no_subparts; + uint count_curr_subparts; + + uint part_error_code; + + uint no_list_values; + + uint no_part_fields; + uint no_subpart_fields; + uint no_full_part_fields; + + /* + This variable is used to calculate the partition id when using + LINEAR KEY/HASH. This functionality is kept in the MySQL Server + but mainly of use to handlers supporting partitioning. + */ + uint16 linear_hash_mask; + + bool use_default_partitions; + bool use_default_no_partitions; + bool use_default_subpartitions; + bool use_default_no_subpartitions; + bool default_partitions_setup; + bool defined_max_value; + bool list_of_part_fields; + bool list_of_subpart_fields; + bool linear_hash_ind; + bool fixed; + bool from_openfrm; + + partition_info() + : get_partition_id(NULL), get_part_partition_id(NULL), + get_subpartition_id(NULL), + part_field_array(NULL), subpart_field_array(NULL), + full_part_field_array(NULL), + part_expr(NULL), subpart_expr(NULL), item_free_list(NULL), + list_array(NULL), + part_info_string(NULL), + part_func_string(NULL), subpart_func_string(NULL), + part_state(NULL), + curr_part_elem(NULL), current_partition(NULL), + default_engine_type(NULL), + part_result_type(INT_RESULT), + part_type(NOT_A_PARTITION), subpart_type(NOT_A_PARTITION), + part_info_len(0), part_state_len(0), + part_func_len(0), subpart_func_len(0), + no_parts(0), no_subparts(0), + count_curr_subparts(0), part_error_code(0), + no_list_values(0), no_part_fields(0), no_subpart_fields(0), + no_full_part_fields(0), linear_hash_mask(0), + use_default_partitions(TRUE), + use_default_no_partitions(TRUE), + use_default_subpartitions(TRUE), + use_default_no_subpartitions(TRUE), + default_partitions_setup(FALSE), + defined_max_value(FALSE), + list_of_part_fields(FALSE), list_of_subpart_fields(FALSE), + linear_hash_ind(FALSE), + fixed(FALSE), + from_openfrm(FALSE) + { + all_fields_in_PF.clear_all(); + all_fields_in_PPF.clear_all(); + all_fields_in_SPF.clear_all(); + some_fields_in_PF.clear_all(); + partitions.empty(); + temp_partitions.empty(); + part_field_list.empty(); + subpart_field_list.empty(); + } + ~partition_info() {} + + /* Answers the question if subpartitioning is used for a certain table */ + bool is_sub_partitioned() + { + return (subpart_type == NOT_A_PARTITION ? FALSE : TRUE); + } + + /* Returns the total number of partitions on the leaf level */ + uint get_tot_partitions() + { + return no_parts * (is_sub_partitioned() ? no_subparts : 1); + } + + bool set_up_defaults_for_partitioning(handler *file, ulonglong max_rows, + uint start_no); + char *has_unique_names(); +private: + bool set_up_default_partitions(handler *file, ulonglong max_rows, + uint start_no); + bool set_up_default_subpartitions(handler *file, ulonglong max_rows); + char *create_default_partition_names(uint part_no, uint no_parts, + uint start_no, bool is_subpart); + bool has_unique_name(partition_element *element); +}; + +uint32 get_next_partition_id_range(struct st_partition_iter* part_iter); + +/* Initialize the iterator to return a single partition with given part_id */ + +static inline void init_single_partition_iterator(uint32 part_id, + PARTITION_ITERATOR *part_iter) +{ + part_iter->part_nums.start= part_id; + part_iter->part_nums.end= part_id+1; + part_iter->get_next= get_next_partition_id_range; +} + +/* Initialize the iterator to enumerate all partitions */ +static inline +void init_all_partitions_iterator(partition_info *part_info, + PARTITION_ITERATOR *part_iter) +{ + part_iter->part_nums.start= 0; + part_iter->part_nums.end= part_info->no_parts; + part_iter->get_next= get_next_partition_id_range; +} diff --git a/sql/records.cc b/sql/records.cc index b352f9f395a..5cb9b1e5c47 100644 --- a/sql/records.cc +++ b/sql/records.cc @@ -65,7 +65,7 @@ void init_read_record_idx(READ_RECORD *info, THD *thd, TABLE *table, table->status=0; /* And it's always found */ if (!table->file->inited) { - table->file->ha_index_init(idx); + table->file->ha_index_init(idx, 1); table->file->extra(HA_EXTRA_RETRIEVE_PRIMARY_KEY); } /* read_record will be changed to rr_index in rr_index_first */ @@ -73,8 +73,74 @@ void init_read_record_idx(READ_RECORD *info, THD *thd, TABLE *table, } -/* init struct for read with info->read_record */ - +/* + init_read_record is used to scan by using a number of different methods. + Which method to use is set-up in this call so that later calls to + the info->read_record will call the appropriate method using a function + pointer. + + There are five methods that relate completely to the sort function + filesort. The result of a filesort is retrieved using read_record + calls. The other two methods are used for normal table access. + + The filesort will produce references to the records sorted, these + references can be stored in memory or in a temporary file. + + The temporary file is normally used when the references doesn't fit into + a properly sized memory buffer. For most small queries the references + are stored in the memory buffer. + + The temporary file is also used when performing an update where a key is + modified. + + Methods used when ref's are in memory (using rr_from_pointers): + rr_unpack_from_buffer: + ---------------------- + This method is used when table->sort.addon_field is allocated. + This is allocated for most SELECT queries not involving any BLOB's. + In this case the records are fetched from a memory buffer. + rr_from_pointers: + ----------------- + Used when the above is not true, UPDATE, DELETE and so forth and + SELECT's involving BLOB's. It is also used when the addon_field + buffer is not allocated due to that its size was bigger than the + session variable max_length_for_sort_data. + In this case the record data is fetched from the handler using the + saved reference using the rnd_pos handler call. + + Methods used when ref's are in a temporary file (using rr_from_tempfile) + rr_unpack_from_tempfile: + ------------------------ + Same as rr_unpack_from_buffer except that references are fetched from + temporary file. Should obviously not really happen other than in + strange configurations. + + rr_from_tempfile: + ----------------- + Same as rr_from_pointers except that references are fetched from + temporary file instead of from + rr_from_cache: + -------------- + This is a special variant of rr_from_tempfile that can be used for + handlers that is not using the HA_FAST_KEY_READ table flag. Instead + of reading the references one by one from the temporary file it reads + a set of them, sorts them and reads all of them into a buffer which + is then used for a number of subsequent calls to rr_from_cache. + It is only used for SELECT queries and a number of other conditions + on table size. + + All other accesses use either index access methods (rr_quick) or a full + table scan (rr_sequential). + rr_quick: + --------- + rr_quick uses one of the QUICK_SELECT classes in opt_range.cc to + perform an index scan. There are loads of functionality hidden + in these quick classes. It handles all index scans of various kinds. + rr_sequential: + -------------- + This is the most basic access method of a table using rnd_init, + rnd_next and rnd_end. No indexes are used. +*/ void init_read_record(READ_RECORD *info,THD *thd, TABLE *table, SQL_SELECT *select, int use_record_cache, bool print_error) @@ -87,6 +153,10 @@ void init_read_record(READ_RECORD *info,THD *thd, TABLE *table, info->table=table; info->file= table->file; info->forms= &info->table; /* Only one table */ + + if (table->s->tmp_table == TMP_TABLE && !table->sort.addon_field) + VOID(table->file->extra(HA_EXTRA_MMAP)); + if (table->sort.addon_field) { info->rec_buf= table->sort.addon_buf; diff --git a/sql/repl_failsafe.cc b/sql/repl_failsafe.cc index 5cdd24afba4..34dcd80a236 100644 --- a/sql/repl_failsafe.cc +++ b/sql/repl_failsafe.cc @@ -20,6 +20,7 @@ #include "repl_failsafe.h" #include "sql_repl.h" #include "slave.h" +#include "rpl_filter.h" #include "log_event.h" #include <mysql.h> @@ -732,14 +733,14 @@ static int fetch_db_tables(THD *thd, MYSQL *mysql, const char *db, TABLE_LIST table; const char* table_name= row[0]; int error; - if (table_rules_on) + if (rpl_filter->is_on()) { bzero((char*) &table, sizeof(table)); //just for safe table.db= (char*) db; table.table_name= (char*) table_name; table.updating= 1; - if (!tables_ok(thd, &table)) + if (!rpl_filter->tables_ok(thd->db, &table)) continue; } /* download master's table and overwrite slave's table */ @@ -858,8 +859,8 @@ bool load_master_data(THD* thd) data from master */ - if (!db_ok(db, replicate_do_db, replicate_ignore_db) || - !db_ok_with_wild_table(db) || + if (!rpl_filter->db_ok(db) || + !rpl_filter->db_ok_with_wild_table(db) || !strcmp(db,"mysql")) { *cur_table_res = 0; diff --git a/sql/rpl_filter.cc b/sql/rpl_filter.cc new file mode 100644 index 00000000000..143cd027b5f --- /dev/null +++ b/sql/rpl_filter.cc @@ -0,0 +1,546 @@ +/* Copyright (C) 2000-2003 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "mysql_priv.h" +#include "rpl_filter.h" + +#define TABLE_RULE_HASH_SIZE 16 +#define TABLE_RULE_ARR_SIZE 16 + +Rpl_filter::Rpl_filter() : + table_rules_on(0), do_table_inited(0), ignore_table_inited(0), + wild_do_table_inited(0), wild_ignore_table_inited(0) +{ + do_db.empty(); + ignore_db.empty(); + rewrite_db.empty(); +} + + +Rpl_filter::~Rpl_filter() +{ + if (do_table_inited) + hash_free(&do_table); + if (ignore_table_inited) + hash_free(&ignore_table); + if (wild_do_table_inited) + free_string_array(&wild_do_table); + if (wild_ignore_table_inited) + free_string_array(&wild_ignore_table); + free_list(&do_db); + free_list(&ignore_db); + free_list(&rewrite_db); +} + + +/* + Returns true if table should be logged/replicated + + SYNOPSIS + tables_ok() + db db to use if db in TABLE_LIST is undefined for a table + tables list of tables to check + + NOTES + Changing table order in the list can lead to different results. + + Note also order of precedence of do/ignore rules (see code). For + that reason, users should not set conflicting rules because they + may get unpredicted results (precedence order is explained in the + manual). + + If no table in the list is marked "updating", then we always + return 0, because there is no reason to execute this statement on + slave if it updates nothing. (Currently, this can only happen if + statement is a multi-delete (SQLCOM_DELETE_MULTI) and "tables" are + the tables in the FROM): + + In the case of SQLCOM_DELETE_MULTI, there will be a second call to + tables_ok(), with tables having "updating==TRUE" (those after the + DELETE), so this second call will make the decision (because + all_tables_not_ok() = !tables_ok(1st_list) && + !tables_ok(2nd_list)). + + TODO + "Include all tables like "abc.%" except "%.EFG"". (Can't be done now.) + If we supported Perl regexps, we could do it with pattern: /^abc\.(?!EFG)/ + (I could not find an equivalent in the regex library MySQL uses). + + RETURN VALUES + 0 should not be logged/replicated + 1 should be logged/replicated +*/ + +bool +Rpl_filter::tables_ok(const char* db, TABLE_LIST* tables) +{ + bool some_tables_updating= 0; + DBUG_ENTER("Rpl_filter::tables_ok"); + + for (; tables; tables= tables->next_global) + { + char hash_key[2*NAME_LEN+2]; + char *end; + uint len; + + if (!tables->updating) + continue; + some_tables_updating= 1; + end= strmov(hash_key, tables->db ? tables->db : db); + *end++= '.'; + len= (uint) (strmov(end, tables->table_name) - hash_key); + if (do_table_inited) // if there are any do's + { + if (hash_search(&do_table, (byte*) hash_key, len)) + DBUG_RETURN(1); + } + if (ignore_table_inited) // if there are any ignores + { + if (hash_search(&ignore_table, (byte*) hash_key, len)) + DBUG_RETURN(0); + } + if (wild_do_table_inited && + find_wild(&wild_do_table, hash_key, len)) + DBUG_RETURN(1); + if (wild_ignore_table_inited && + find_wild(&wild_ignore_table, hash_key, len)) + DBUG_RETURN(0); + } + + /* + If no table was to be updated, ignore statement (no reason we play it on + slave, slave is supposed to replicate _changes_ only). + If no explicit rule found and there was a do list, do not replicate. + If there was no do list, go ahead + */ + DBUG_RETURN(some_tables_updating && + !do_table_inited && !wild_do_table_inited); +} + + +/* + Checks whether a db matches some do_db and ignore_db rules + + SYNOPSIS + db_ok() + db name of the db to check + + RETURN VALUES + 0 should not be logged/replicated + 1 should be logged/replicated +*/ + +bool +Rpl_filter::db_ok(const char* db) +{ + DBUG_ENTER("Rpl_filter::db_ok"); + + if (do_db.is_empty() && ignore_db.is_empty()) + DBUG_RETURN(1); // Ok to replicate if the user puts no constraints + + /* + If the user has specified restrictions on which databases to replicate + and db was not selected, do not replicate. + */ + if (!db) + DBUG_RETURN(0); + + if (!do_db.is_empty()) // if the do's are not empty + { + I_List_iterator<i_string> it(do_db); + i_string* tmp; + + while ((tmp=it++)) + { + if (!strcmp(tmp->ptr, db)) + DBUG_RETURN(1); // match + } + DBUG_RETURN(0); + } + else // there are some elements in the don't, otherwise we cannot get here + { + I_List_iterator<i_string> it(ignore_db); + i_string* tmp; + + while ((tmp=it++)) + { + if (!strcmp(tmp->ptr, db)) + DBUG_RETURN(0); // match + } + DBUG_RETURN(1); + } +} + + +/* + Checks whether a db matches wild_do_table and wild_ignore_table + rules (for replication) + + SYNOPSIS + db_ok_with_wild_table() + db name of the db to check. + Is tested with check_db_name() before calling this function. + + NOTES + Here is the reason for this function. + We advise users who want to exclude a database 'db1' safely to do it + with replicate_wild_ignore_table='db1.%' instead of binlog_ignore_db or + replicate_ignore_db because the two lasts only check for the selected db, + which won't work in that case: + USE db2; + UPDATE db1.t SET ... #this will be replicated and should not + whereas replicate_wild_ignore_table will work in all cases. + With replicate_wild_ignore_table, we only check tables. When + one does 'DROP DATABASE db1', tables are not involved and the + statement will be replicated, while users could expect it would not (as it + rougly means 'DROP db1.first_table, DROP db1.second_table...'). + In other words, we want to interpret 'db1.%' as "everything touching db1". + That is why we want to match 'db1' against 'db1.%' wild table rules. + + RETURN VALUES + 0 should not be logged/replicated + 1 should be logged/replicated +*/ + +bool +Rpl_filter::db_ok_with_wild_table(const char *db) +{ + DBUG_ENTER("Rpl_filter::db_ok_with_wild_table"); + + char hash_key[NAME_LEN+2]; + char *end; + int len; + end= strmov(hash_key, db); + *end++= '.'; + len= end - hash_key ; + if (wild_do_table_inited && find_wild(&wild_do_table, hash_key, len)) + { + DBUG_PRINT("return",("1")); + DBUG_RETURN(1); + } + if (wild_ignore_table_inited && find_wild(&wild_ignore_table, hash_key, len)) + { + DBUG_PRINT("return",("0")); + DBUG_RETURN(0); + } + + /* + If no explicit rule found and there was a do list, do not replicate. + If there was no do list, go ahead + */ + DBUG_PRINT("return",("db=%s,retval=%d", db, !wild_do_table_inited)); + DBUG_RETURN(!wild_do_table_inited); +} + + +bool +Rpl_filter::is_on() +{ + return table_rules_on; +} + + +int +Rpl_filter::add_do_table(const char* table_spec) +{ + DBUG_ENTER("Rpl_filter::add_do_table"); + if (!do_table_inited) + init_table_rule_hash(&do_table, &do_table_inited); + table_rules_on= 1; + DBUG_RETURN(add_table_rule(&do_table, table_spec)); +} + + +int +Rpl_filter::add_ignore_table(const char* table_spec) +{ + DBUG_ENTER("Rpl_filter::add_ignore_table"); + if (!ignore_table_inited) + init_table_rule_hash(&ignore_table, &ignore_table_inited); + table_rules_on= 1; + DBUG_RETURN(add_table_rule(&ignore_table, table_spec)); +} + + +int +Rpl_filter::add_wild_do_table(const char* table_spec) +{ + DBUG_ENTER("Rpl_filter::add_wild_do_table"); + if (!wild_do_table_inited) + init_table_rule_array(&wild_do_table, &wild_do_table_inited); + table_rules_on= 1; + DBUG_RETURN(add_wild_table_rule(&wild_do_table, table_spec)); +} + + +int +Rpl_filter::add_wild_ignore_table(const char* table_spec) +{ + DBUG_ENTER("Rpl_filter::add_wild_ignore_table"); + if (!wild_ignore_table_inited) + init_table_rule_array(&wild_ignore_table, &wild_ignore_table_inited); + table_rules_on= 1; + DBUG_RETURN(add_wild_table_rule(&wild_ignore_table, table_spec)); +} + + +void +Rpl_filter::add_db_rewrite(const char* from_db, const char* to_db) +{ + i_string_pair *db_pair = new i_string_pair(from_db, to_db); + rewrite_db.push_back(db_pair); +} + + +int +Rpl_filter::add_table_rule(HASH* h, const char* table_spec) +{ + const char* dot = strchr(table_spec, '.'); + if (!dot) return 1; + // len is always > 0 because we know the there exists a '.' + uint len = (uint)strlen(table_spec); + TABLE_RULE_ENT* e = (TABLE_RULE_ENT*)my_malloc(sizeof(TABLE_RULE_ENT) + + len, MYF(MY_WME)); + if (!e) return 1; + e->db= (char*)e + sizeof(TABLE_RULE_ENT); + e->tbl_name= e->db + (dot - table_spec) + 1; + e->key_len= len; + memcpy(e->db, table_spec, len); + + return my_hash_insert(h, (byte*)e); +} + + +/* + Add table expression with wildcards to dynamic array +*/ + +int +Rpl_filter::add_wild_table_rule(DYNAMIC_ARRAY* a, const char* table_spec) +{ + const char* dot = strchr(table_spec, '.'); + if (!dot) return 1; + uint len = (uint)strlen(table_spec); + TABLE_RULE_ENT* e = (TABLE_RULE_ENT*)my_malloc(sizeof(TABLE_RULE_ENT) + + len, MYF(MY_WME)); + if (!e) return 1; + e->db= (char*)e + sizeof(TABLE_RULE_ENT); + e->tbl_name= e->db + (dot - table_spec) + 1; + e->key_len= len; + memcpy(e->db, table_spec, len); + insert_dynamic(a, (gptr)&e); + return 0; +} + + +void +Rpl_filter::add_do_db(const char* table_spec) +{ + DBUG_ENTER("Rpl_filter::add_do_db"); + i_string *db = new i_string(table_spec); + do_db.push_back(db); +} + + +void +Rpl_filter::add_ignore_db(const char* table_spec) +{ + DBUG_ENTER("Rpl_filter::add_ignore_db"); + i_string *db = new i_string(table_spec); + ignore_db.push_back(db); +} + + +static byte* get_table_key(const byte* a, uint* len, + my_bool __attribute__((unused))) +{ + TABLE_RULE_ENT *e= (TABLE_RULE_ENT *) a; + + *len= e->key_len; + return (byte*)e->db; +} + + +static void free_table_ent(void* a) +{ + TABLE_RULE_ENT *e= (TABLE_RULE_ENT *) a; + + my_free((gptr) e, MYF(0)); +} + + +void +Rpl_filter::init_table_rule_hash(HASH* h, bool* h_inited) +{ + hash_init(h, system_charset_info,TABLE_RULE_HASH_SIZE,0,0, + get_table_key, free_table_ent, 0); + *h_inited = 1; +} + + +void +Rpl_filter::init_table_rule_array(DYNAMIC_ARRAY* a, bool* a_inited) +{ + my_init_dynamic_array(a, sizeof(TABLE_RULE_ENT*), TABLE_RULE_ARR_SIZE, + TABLE_RULE_ARR_SIZE); + *a_inited = 1; +} + + +TABLE_RULE_ENT* +Rpl_filter::find_wild(DYNAMIC_ARRAY *a, const char* key, int len) +{ + uint i; + const char* key_end= key + len; + + for (i= 0; i < a->elements; i++) + { + TABLE_RULE_ENT* e ; + get_dynamic(a, (gptr)&e, i); + if (!my_wildcmp(system_charset_info, key, key_end, + (const char*)e->db, + (const char*)(e->db + e->key_len), + '\\',wild_one,wild_many)) + return e; + } + + return 0; +} + + +void +Rpl_filter::free_string_array(DYNAMIC_ARRAY *a) +{ + uint i; + for (i= 0; i < a->elements; i++) + { + char* p; + get_dynamic(a, (gptr) &p, i); + my_free(p, MYF(MY_WME)); + } + delete_dynamic(a); +} + + +/* + Builds a String from a HASH of TABLE_RULE_ENT. Cannot be used for any other + hash, as it assumes that the hash entries are TABLE_RULE_ENT. + + SYNOPSIS + table_rule_ent_hash_to_str() + s pointer to the String to fill + h pointer to the HASH to read + + RETURN VALUES + none +*/ + +void +Rpl_filter::table_rule_ent_hash_to_str(String* s, HASH* h, bool inited) +{ + s->length(0); + if (inited) + { + for (uint i= 0; i < h->records; i++) + { + TABLE_RULE_ENT* e= (TABLE_RULE_ENT*) hash_element(h, i); + if (s->length()) + s->append(','); + s->append(e->db,e->key_len); + } + } +} + + +void +Rpl_filter::table_rule_ent_dynamic_array_to_str(String* s, DYNAMIC_ARRAY* a, + bool inited) +{ + s->length(0); + if (inited) + { + for (uint i= 0; i < a->elements; i++) + { + TABLE_RULE_ENT* e; + get_dynamic(a, (gptr)&e, i); + if (s->length()) + s->append(','); + s->append(e->db,e->key_len); + } + } +} + + +void +Rpl_filter::get_do_table(String* str) +{ + table_rule_ent_hash_to_str(str, &do_table, do_table_inited); +} + + +void +Rpl_filter::get_ignore_table(String* str) +{ + table_rule_ent_hash_to_str(str, &ignore_table, ignore_table_inited); +} + + +void +Rpl_filter::get_wild_do_table(String* str) +{ + table_rule_ent_dynamic_array_to_str(str, &wild_do_table, wild_do_table_inited); +} + + +void +Rpl_filter::get_wild_ignore_table(String* str) +{ + table_rule_ent_dynamic_array_to_str(str, &wild_ignore_table, wild_ignore_table_inited); +} + + +const char* +Rpl_filter::get_rewrite_db(const char* db, uint32 *new_len) +{ + if (rewrite_db.is_empty() || !db) + return db; + I_List_iterator<i_string_pair> it(rewrite_db); + i_string_pair* tmp; + + while ((tmp=it++)) + { + if (!strcmp(tmp->key, db)) + { + *new_len= strlen(tmp->val); + return tmp->val; + } + } + return db; +} + + +I_List<i_string>* +Rpl_filter::get_do_db() +{ + return &do_db; +} + + +I_List<i_string>* +Rpl_filter::get_ignore_db() +{ + return &ignore_db; +} diff --git a/sql/rpl_filter.h b/sql/rpl_filter.h new file mode 100644 index 00000000000..58d2b97c9c6 --- /dev/null +++ b/sql/rpl_filter.h @@ -0,0 +1,117 @@ +/* Copyright (C) 2000-2003 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef RPL_FILTER_H +#define RPL_FILTER_H + +#include "mysql.h" + +typedef struct st_table_rule_ent +{ + char* db; + char* tbl_name; + uint key_len; +} TABLE_RULE_ENT; + +/* + Rpl_filter + + Inclusion and exclusion rules of tables and databases. + Also handles rewrites of db. + Used for replication and binlogging. + */ +class Rpl_filter +{ +public: + Rpl_filter(); + ~Rpl_filter(); + Rpl_filter(Rpl_filter const&); + Rpl_filter& operator=(Rpl_filter const&); + + /* Checks - returns true if ok to replicate/log */ + + bool tables_ok(const char* db, TABLE_LIST* tables); + bool db_ok(const char* db); + bool db_ok_with_wild_table(const char *db); + + bool is_on(); + + /* Setters - add filtering rules */ + + int add_do_table(const char* table_spec); + int add_ignore_table(const char* table_spec); + + int add_wild_do_table(const char* table_spec); + int add_wild_ignore_table(const char* table_spec); + + void add_do_db(const char* db_spec); + void add_ignore_db(const char* db_spec); + + void add_db_rewrite(const char* from_db, const char* to_db); + + /* Getters - to get information about current rules */ + + void get_do_table(String* str); + void get_ignore_table(String* str); + + void get_wild_do_table(String* str); + void get_wild_ignore_table(String* str); + + const char* get_rewrite_db(const char* db, uint32 *new_len); + + I_List<i_string>* get_do_db(); + I_List<i_string>* get_ignore_db(); + +private: + bool table_rules_on; + + void init_table_rule_hash(HASH* h, bool* h_inited); + void init_table_rule_array(DYNAMIC_ARRAY* a, bool* a_inited); + + int add_table_rule(HASH* h, const char* table_spec); + int add_wild_table_rule(DYNAMIC_ARRAY* a, const char* table_spec); + + void free_string_array(DYNAMIC_ARRAY *a); + + void table_rule_ent_hash_to_str(String* s, HASH* h, bool inited); + void table_rule_ent_dynamic_array_to_str(String* s, DYNAMIC_ARRAY* a, + bool inited); + TABLE_RULE_ENT* find_wild(DYNAMIC_ARRAY *a, const char* key, int len); + + /* + Those 4 structures below are uninitialized memory unless the + corresponding *_inited variables are "true". + */ + HASH do_table; + HASH ignore_table; + DYNAMIC_ARRAY wild_do_table; + DYNAMIC_ARRAY wild_ignore_table; + + bool do_table_inited; + bool ignore_table_inited; + bool wild_do_table_inited; + bool wild_ignore_table_inited; + + I_List<i_string> do_db; + I_List<i_string> ignore_db; + + I_List<i_string_pair> rewrite_db; +}; + +extern Rpl_filter *rpl_filter; +extern Rpl_filter *binlog_filter; + +#endif // RPL_FILTER_H diff --git a/sql/rpl_injector.cc b/sql/rpl_injector.cc new file mode 100644 index 00000000000..a69dea9a158 --- /dev/null +++ b/sql/rpl_injector.cc @@ -0,0 +1,153 @@ +/* + Copyright (C) 2005 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "mysql_priv.h" +#include "rpl_injector.h" +#ifdef HAVE_ROW_BASED_REPLICATION + +/* + injector::transaction - member definitions +*/ + +/* inline since it's called below */ +inline +injector::transaction::transaction(MYSQL_LOG *log, THD *thd) + : m_thd(thd) +{ + /* + Default initialization of m_start_pos (which initializes it to garbage). + We need to fill it in using the code below. + */ + LOG_INFO log_info; + log->get_current_log(&log_info); + /* !!! binlog_pos does not follow RAII !!! */ + m_start_pos.m_file_name= my_strdup(log_info.log_file_name, MYF(0)); + m_start_pos.m_file_pos= log_info.pos; + + begin_trans(m_thd); +} + +injector::transaction::~transaction() +{ + /* Needed since my_free expects a 'char*' (instead of 'void*'). */ + char* const the_memory= const_cast<char*>(m_start_pos.m_file_name); + + /* + We set the first character to null just to give all the copies of the + start position a (minimal) chance of seening that the memory is lost. + All assuming the my_free does not step over the memory, of course. + */ + *the_memory= '\0'; + + my_free(the_memory, MYF(0)); +} + +int injector::transaction::commit() +{ + DBUG_ENTER("injector::transaction::commit()"); + m_thd->binlog_flush_pending_rows_event(true); + end_trans(m_thd, COMMIT); + DBUG_RETURN(0); +} + + +int injector::transaction::write_row (server_id_type sid, table tbl, + MY_BITMAP const* cols, size_t colcnt, + record_type record) +{ + DBUG_ENTER("injector::transaction::write_row(...)"); + m_thd->set_server_id(sid); + m_thd->binlog_write_row(tbl.get_table(), tbl.is_transactional(), + cols, colcnt, record); + DBUG_RETURN(0); +} + + +int injector::transaction::delete_row(server_id_type sid, table tbl, + MY_BITMAP const* cols, size_t colcnt, + record_type record) +{ + DBUG_ENTER("injector::transaction::delete_row(...)"); + m_thd->set_server_id(sid); + m_thd->binlog_delete_row(tbl.get_table(), tbl.is_transactional(), + cols, colcnt, record); + DBUG_RETURN(0); +} + + +int injector::transaction::update_row(server_id_type sid, table tbl, + MY_BITMAP const* cols, size_t colcnt, + record_type before, record_type after) +{ + DBUG_ENTER("injector::transaction::update_row(...)"); + m_thd->set_server_id(sid); + m_thd->binlog_update_row(tbl.get_table(), tbl.is_transactional(), + cols, colcnt, before, after); + DBUG_RETURN(0); +} + + +injector::transaction::binlog_pos injector::transaction::start_pos() const +{ + return m_start_pos; +} + + +/* + injector - member definitions +*/ + +/* This constructor is called below */ +inline injector::injector() +{ +} + +static injector *s_injector= 0; +injector *injector::instance() +{ + if (s_injector == 0) + s_injector= new injector; + /* "There can be only one [instance]" */ + return s_injector; +} + + + +injector::transaction injector::new_trans(THD *thd) +{ + DBUG_ENTER("injector::new_trans(THD*)"); + /* + Currently, there is no alternative to using 'mysql_bin_log' since that + is hardcoded into the way the handler is using the binary log. + */ + DBUG_RETURN(transaction(&mysql_bin_log, thd)); +} + +void injector::new_trans(THD *thd, injector::transaction *ptr) +{ + DBUG_ENTER("injector::new_trans(THD *, transaction *)"); + /* + Currently, there is no alternative to using 'mysql_bin_log' since that + is hardcoded into the way the handler is using the binary log. + */ + transaction trans(&mysql_bin_log, thd); + ptr->swap(trans); + + DBUG_VOID_RETURN; +} + +#endif diff --git a/sql/rpl_injector.h b/sql/rpl_injector.h new file mode 100644 index 00000000000..32d3fdd1a78 --- /dev/null +++ b/sql/rpl_injector.h @@ -0,0 +1,251 @@ +/* + Copyright (C) 2005 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef INJECTOR_H +#define INJECTOR_H + +/* Pull in 'byte', 'my_off_t', and 'uint32' */ +#include <my_global.h> + +#ifdef HAVE_ROW_BASED_REPLICATION +#include <my_bitmap.h> + +/* Forward declarations */ +class handler; +class MYSQL_LOG; +class st_table; + +typedef st_table TABLE; + +/* + Injector to inject rows into the MySQL server. + + The injector class is used to notify the MySQL server of new rows that have + appeared outside of MySQL control. + + The original purpose of this is to allow clusters---which handle replication + inside the cluster through other means---to insert new rows into binary log. + Note, however, that the injector should be used whenever rows are altered in + any manner that is outside of MySQL server visibility and which therefore + are not seen by the MySQL server. + */ +class injector +{ +public: + + /* + Get an instance of the injector. + + DESCRIPTION + The injector is a Singleton, so this static function return the + available instance of the injector. + + RETURN VALUE + A pointer to the available injector object. + */ + static injector *instance(); + + /* + A transaction where rows can be added. + + DESCRIPTION + The transaction class satisfy the **CopyConstructible** and + **Assignable** requirements. Note that the transaction is *not* + default constructible. + */ + class transaction { + friend class injector; + public: + /* Convenience definitions */ + typedef byte* record_type; + typedef uint32 server_id_type; + + /* + Table reference. + + RESPONSIBILITY + + The class contains constructors to handle several forms of + references to tables. The constructors can implicitly be used to + construct references from, e.g., strings containing table names. + + EXAMPLE + + The class is intended to be used *by value*. Please, do not try to + construct objects of this type using 'new'; instead construct an + object, possibly a temporary object. For example: + + injector::transaction::table tbl(share->table, true); + MY_BITMAP cols; + bitmap_init(&cols, NULL, (i + 7) / 8, false); + inj->write_row(::server_id, tbl, &cols, row_data); + + or + + MY_BITMAP cols; + bitmap_init(&cols, NULL, (i + 7) / 8, false); + inj->write_row(::server_id, + injector::transaction::table(share->table, true), + &cols, row_data); + + This will work, be more efficient, and have greater chance of + inlining, not run the risk of losing pointers. + + COLLABORATION + + injector::transaction + Provide a flexible interface to the representation of tables. + + */ + class table + { + public: + table(TABLE *table, bool is_transactional) + : m_table(table), m_is_transactional(is_transactional) + { + } + + char const *db_name() const { return m_table->s->db.str; } + char const *table_name() const { return m_table->s->table_name.str; } + TABLE *get_table() const { return m_table; } + bool is_transactional() const { return m_is_transactional; } + + private: + TABLE *m_table; + bool m_is_transactional; + }; + + /* + Binlog position as a structure. + */ + class binlog_pos { + friend class transaction; + public: + char const *file_name() const { return m_file_name; } + my_off_t file_pos() const { return m_file_pos; } + + private: + char const *m_file_name; + my_off_t m_file_pos; + }; + + transaction() : m_thd(NULL) { } + transaction(transaction const&); + ~transaction(); + + /* Clear transaction, i.e., make calls to 'good()' return false. */ + void clear() { m_thd= NULL; } + + /* Is the transaction in a good state? */ + bool good() const { return m_thd != NULL; } + + /* Default assignment operator: standard implementation */ + transaction& operator=(transaction t) { + swap(t); + return *this; + } + + /* + Add a 'write row' entry to the transaction. + */ + int write_row (server_id_type sid, table tbl, + MY_BITMAP const *cols, size_t colcnt, + record_type record); + + /* + Add a 'delete row' entry to the transaction. + */ + int delete_row(server_id_type sid, table tbl, + MY_BITMAP const *cols, size_t colcnt, + record_type record); + + /* + Add an 'update row' entry to the transaction. + */ + int update_row(server_id_type sid, table tbl, + MY_BITMAP const *cols, size_t colcnt, + record_type before, record_type after); + + /* + Commit a transaction. + + This member function will clean up after a sequence of *_row calls by, + for example, releasing resource and unlocking files. + */ + int commit(); + + /* + Get the position for the start of the transaction. + + Returns the position in the binary log of the first event in this + transaction. If no event is yet written, the position where the event + *will* be written is returned. This position is known, since a + new_transaction() will lock the binary log and prevent any other + writes to the binary log. + */ + binlog_pos start_pos() const; + + private: + /* Only the injector may construct these object */ + transaction(MYSQL_LOG *, THD *); + + void swap(transaction& o) { + /* std::swap(m_start_pos, o.m_start_pos); */ + { + binlog_pos const tmp= m_start_pos; + m_start_pos= o.m_start_pos; + o.m_start_pos= tmp; + } + + /* std::swap(m_thd, o.m_thd); */ + { + THD* const tmp= m_thd; + m_thd= o.m_thd; + o.m_thd= tmp; + } + } + + binlog_pos m_start_pos; + THD *m_thd; + }; + + /* + Create a new transaction. This member function will prepare for a + sequence of *_row calls by, for example, reserving resources and + locking files. There are two overloaded alternatives: one returning a + transaction by value and one using placement semantics. The following + two calls are equivalent, with the exception that the latter will + overwrite the transaction. + + injector::transaction trans1= inj->new_trans(thd); + + injector::transaction trans2; + inj->new_trans(thd, &trans); + */ + transaction new_trans(THD *); + void new_trans(THD *, transaction *); + +private: + explicit injector(); + ~injector() { } /* Nothing needs to be done */ + injector(injector const&); /* You're not allowed to copy injector + instances. + */ +}; + +#endif /* HAVE_ROW_BASED_REPLICATION */ +#endif /* INJECTOR_H */ diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h new file mode 100644 index 00000000000..5500fdf1f64 --- /dev/null +++ b/sql/rpl_rli.h @@ -0,0 +1,312 @@ +/* Copyright (C) 2005 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef RPL_RLI_H +#define RPL_RLI_H + +#define MAX_SLAVE_ERRMSG 1024 + +#include "rpl_tblmap.h" + +/**************************************************************************** + + Replication SQL Thread + + st_relay_log_info contains: + - the current relay log + - the current relay log offset + - master log name + - master log sequence corresponding to the last update + - misc information specific to the SQL thread + + st_relay_log_info is initialized from the slave.info file if such exists. + Otherwise, data members are intialized with defaults. The initialization is + done with init_relay_log_info() call. + + The format of slave.info file: + + relay_log_name + relay_log_pos + master_log_name + master_log_pos + + To clean up, call end_relay_log_info() + +*****************************************************************************/ + +typedef struct st_relay_log_info +{ + /* + If flag set, then rli does not store its state in any info file. + This is the case only when we execute BINLOG SQL commands inside + a client, non-replication thread. + */ + bool no_storage; + + /*** The following variables can only be read when protect by data lock ****/ + + /* + info_fd - file descriptor of the info file. set only during + initialization or clean up - safe to read anytime + cur_log_fd - file descriptor of the current read relay log + */ + File info_fd,cur_log_fd; + + /* + Protected with internal locks. + Must get data_lock when resetting the logs. + */ + MYSQL_LOG relay_log; + LOG_INFO linfo; + IO_CACHE cache_buf,*cur_log; + + /* The following variables are safe to read any time */ + + /* IO_CACHE of the info file - set only during init or end */ + IO_CACHE info_file; + + /* + When we restart slave thread we need to have access to the previously + created temporary tables. Modified only on init/end and by the SQL + thread, read only by SQL thread. + */ + TABLE *save_temporary_tables; + + /* + standard lock acquistion order to avoid deadlocks: + run_lock, data_lock, relay_log.LOCK_log, relay_log.LOCK_index + */ + pthread_mutex_t data_lock,run_lock; + + /* + start_cond is broadcast when SQL thread is started + stop_cond - when stopped + data_cond - when data protected by data_lock changes + */ + pthread_cond_t start_cond, stop_cond, data_cond; + + /* parent master info structure */ + struct st_master_info *mi; + + /* + Needed to deal properly with cur_log getting closed and re-opened with + a different log under our feet + */ + uint32 cur_log_old_open_count; + + /* + Let's call a group (of events) : + - a transaction + or + - an autocommiting query + its associated events (INSERT_ID, + TIMESTAMP...) + We need these rli coordinates : + - relay log name and position of the beginning of the group we currently are + executing. Needed to know where we have to restart when replication has + stopped in the middle of a group (which has been rolled back by the slave). + - relay log name and position just after the event we have just + executed. This event is part of the current group. + Formerly we only had the immediately above coordinates, plus a 'pending' + variable, but this dealt wrong with the case of a transaction starting on a + relay log and finishing (commiting) on another relay log. Case which can + happen when, for example, the relay log gets rotated because of + max_binlog_size. + */ + char group_relay_log_name[FN_REFLEN]; + ulonglong group_relay_log_pos; + char event_relay_log_name[FN_REFLEN]; + ulonglong event_relay_log_pos; + ulonglong future_event_relay_log_pos; + + /* + Original log name and position of the group we're currently executing + (whose coordinates are group_relay_log_name/pos in the relay log) + in the master's binlog. These concern the *group*, because in the master's + binlog the log_pos that comes with each event is the position of the + beginning of the group. + */ + char group_master_log_name[FN_REFLEN]; + volatile my_off_t group_master_log_pos; + + /* + Handling of the relay_log_space_limit optional constraint. + ignore_log_space_limit is used to resolve a deadlock between I/O and SQL + threads, the SQL thread sets it to unblock the I/O thread and make it + temporarily forget about the constraint. + */ + ulonglong log_space_limit,log_space_total; + bool ignore_log_space_limit; + + /* + When it commits, InnoDB internally stores the master log position it has + processed so far; the position to store is the one of the end of the + committing event (the COMMIT query event, or the event if in autocommit + mode). + */ +#if MYSQL_VERSION_ID < 40100 + ulonglong future_master_log_pos; +#else + ulonglong future_group_master_log_pos; +#endif + + time_t last_master_timestamp; + + /* + Needed for problems when slave stops and we want to restart it + skipping one or more events in the master log that have caused + errors, and have been manually applied by DBA already. + */ + volatile uint32 slave_skip_counter; + volatile ulong abort_pos_wait; /* Incremented on change master */ + volatile ulong slave_run_id; /* Incremented on slave start */ + pthread_mutex_t log_space_lock; + pthread_cond_t log_space_cond; + THD * sql_thd; + int last_slave_errno; +#ifndef DBUG_OFF + int events_till_abort; +#endif + char last_slave_error[MAX_SLAVE_ERRMSG]; + + /* if not set, the value of other members of the structure are undefined */ + bool inited; + volatile bool abort_slave; + volatile uint slave_running; + + /* + Condition and its parameters from START SLAVE UNTIL clause. + + UNTIL condition is tested with is_until_satisfied() method that is + called by exec_relay_log_event(). is_until_satisfied() caches the result + of the comparison of log names because log names don't change very often; + this cache is invalidated by parts of code which change log names with + notify_*_log_name_updated() methods. (They need to be called only if SQL + thread is running). + */ + + enum {UNTIL_NONE= 0, UNTIL_MASTER_POS, UNTIL_RELAY_POS} until_condition; + char until_log_name[FN_REFLEN]; + ulonglong until_log_pos; + /* extension extracted from log_name and converted to int */ + ulong until_log_name_extension; + /* + Cached result of comparison of until_log_name and current log name + -2 means unitialised, -1,0,1 are comarison results + */ + enum + { + UNTIL_LOG_NAMES_CMP_UNKNOWN= -2, UNTIL_LOG_NAMES_CMP_LESS= -1, + UNTIL_LOG_NAMES_CMP_EQUAL= 0, UNTIL_LOG_NAMES_CMP_GREATER= 1 + } until_log_names_cmp_result; + + char cached_charset[6]; + /* + trans_retries varies between 0 to slave_transaction_retries and counts how + many times the slave has retried the present transaction; gets reset to 0 + when the transaction finally succeeds. retried_trans is a cumulative + counter: how many times the slave has retried a transaction (any) since + slave started. + */ + ulong trans_retries, retried_trans; + + /* + If the end of the hot relay log is made of master's events ignored by the + slave I/O thread, these two keep track of the coords (in the master's + binlog) of the last of these events seen by the slave I/O thread. If not, + ign_master_log_name_end[0] == 0. + As they are like a Rotate event read/written from/to the relay log, they + are both protected by rli->relay_log.LOCK_log. + */ + char ign_master_log_name_end[FN_REFLEN]; + ulonglong ign_master_log_pos_end; + + st_relay_log_info(); + ~st_relay_log_info(); + + /* + Invalidate cached until_log_name and group_relay_log_name comparison + result. Should be called after any update of group_realy_log_name if + there chances that sql_thread is running. + */ + inline void notify_group_relay_log_name_update() + { + if (until_condition==UNTIL_RELAY_POS) + until_log_names_cmp_result= UNTIL_LOG_NAMES_CMP_UNKNOWN; + } + + /* + The same as previous but for group_master_log_name. + */ + inline void notify_group_master_log_name_update() + { + if (until_condition==UNTIL_MASTER_POS) + until_log_names_cmp_result= UNTIL_LOG_NAMES_CMP_UNKNOWN; + } + + inline void inc_event_relay_log_pos() + { + event_relay_log_pos= future_event_relay_log_pos; + } + + void inc_group_relay_log_pos(ulonglong log_pos, + bool skip_lock=0); + + int wait_for_pos(THD* thd, String* log_name, longlong log_pos, + longlong timeout); + void close_temporary_tables(); + + /* Check if UNTIL condition is satisfied. See slave.cc for more. */ + bool is_until_satisfied(); + inline ulonglong until_pos() + { + return ((until_condition == UNTIL_MASTER_POS) ? group_master_log_pos : + group_relay_log_pos); + } + + table_mapping m_table_map; + + /* + Last charset (6 bytes) seen by slave SQL thread is cached here; it helps + the thread save 3 get_charset() per Query_log_event if the charset is not + changing from event to event (common situation). + When the 6 bytes are equal to 0 is used to mean "cache is invalidated". + */ + void cached_charset_invalidate(); + bool cached_charset_compare(char *charset); + + /* + To reload special tables when they are changes, we introduce a set + of functions that will mark whenever special functions need to be + called after modifying tables. Right now, the tables are either + ACL tables or grants tables. + */ + enum enum_reload_flag + { + RELOAD_NONE_F = 0UL, + RELOAD_GRANT_F = (1UL << 0), + RELOAD_ACCESS_F = (1UL << 1) + }; + + ulong m_reload_flags; + + void touching_table(char const* db, char const* table, ulong table_id); + void transaction_end(THD*); + + void cleanup_context(THD *, bool); + time_t unsafe_to_stop_at; +} RELAY_LOG_INFO; + +#endif /* RPL_RLI_H */ diff --git a/sql/rpl_tblmap.cc b/sql/rpl_tblmap.cc new file mode 100644 index 00000000000..a0272b23ee8 --- /dev/null +++ b/sql/rpl_tblmap.cc @@ -0,0 +1,151 @@ +/* Copyright (C) 2005 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "mysql_priv.h" + +#ifdef HAVE_REPLICATION + +#include "rpl_tblmap.h" + +#define MAYBE_TABLE_NAME(T) ((T) ? (T)->s->table_name.str : "<>") +#define TABLE_ID_HASH_SIZE 32 +#define TABLE_ID_CHUNK 256 + +table_mapping::table_mapping() + : m_free(0) +{ + /* + No "free_element" function for entries passed here, as the entries are + allocated in a MEM_ROOT (freed as a whole in the destructor), they cannot + be freed one by one. + Note that below we don't test if hash_init() succeeded. This constructor + is called at startup only. + */ + (void) hash_init(&m_table_ids,&my_charset_bin,TABLE_ID_HASH_SIZE, + offsetof(entry,table_id),sizeof(ulong), + 0,0,0); + /* We don't preallocate any block, this is consistent with m_free=0 above */ + init_alloc_root(&m_mem_root, TABLE_ID_HASH_SIZE*sizeof(entry), 0); +} + +table_mapping::~table_mapping() +{ + hash_free(&m_table_ids); + free_root(&m_mem_root, MYF(0)); +} + +st_table* table_mapping::get_table(ulong table_id) +{ + DBUG_ENTER("table_mapping::get_table(ulong)"); + DBUG_PRINT("enter", ("table_id=%d", table_id)); + entry *e= find_entry(table_id); + if (e) + { + DBUG_PRINT("info", ("tid %d -> table %p (%s)", + table_id, e->table, + MAYBE_TABLE_NAME(e->table))); + DBUG_RETURN(e->table); + } + + DBUG_PRINT("info", ("tid %d is not mapped!", table_id)); + DBUG_RETURN(NULL); +} + +/* + Called when we are out of table id entries. Creates TABLE_ID_CHUNK + new entries, chain them and attach them at the head of the list of free + (free for use) entries. +*/ +int table_mapping::expand() +{ + /* + If we wanted to use "tmp= new (&m_mem_root) entry[TABLE_ID_CHUNK]", + we would have to make "entry" derive from Sql_alloc but then it would not + be a POD anymore and we want it to be (see rpl_tblmap.h). So we allocate + in C. + */ + entry *tmp= (entry *)alloc_root(&m_mem_root, TABLE_ID_CHUNK*sizeof(entry)); + if (tmp == NULL) + return ERR_MEMORY_ALLOCATION; // Memory allocation failed + + /* Find the end of this fresh new array of free entries */ + entry *e_end= tmp+TABLE_ID_CHUNK-1; + for (entry *e= tmp; e < e_end; e++) + e->next= e+1; + e_end->next= m_free; + m_free= tmp; + return 0; +} + +int table_mapping::set_table(ulong table_id, TABLE* table) +{ + DBUG_ENTER("table_mapping::set_table(ulong,TABLE*)"); + DBUG_PRINT("enter", ("table_id=%d, table=%p (%s)", + table_id, + table, MAYBE_TABLE_NAME(table))); + entry *e= find_entry(table_id); + if (e == 0) + { + if (m_free == 0 && expand()) + DBUG_RETURN(ERR_MEMORY_ALLOCATION); // Memory allocation failed + e= m_free; + m_free= m_free->next; + } + else + hash_delete(&m_table_ids,(byte *)e); + + e->table_id= table_id; + e->table= table; + my_hash_insert(&m_table_ids,(byte *)e); + + DBUG_PRINT("info", ("tid %d -> table %p (%s)", + table_id, e->table, + MAYBE_TABLE_NAME(e->table))); + DBUG_RETURN(0); // All OK +} + +int table_mapping::remove_table(ulong table_id) +{ + entry *e= find_entry(table_id); + if (e) + { + hash_delete(&m_table_ids,(byte *)e); + /* we add this entry to the chain of free (free for use) entries */ + e->next= m_free; + m_free= e; + return 0; // All OK + } + return 1; // No table to remove +} + +/* + Puts all entries into the list of free-for-use entries (does not free any + memory), and empties the hash. +*/ +void table_mapping::clear_tables() +{ + DBUG_ENTER("table_mapping::clear_tables()"); + for (uint i= 0; i < m_table_ids.records; i++) + { + entry *e= (entry *)hash_element(&m_table_ids, i); + e->next= m_free; + m_free= e; + } + my_hash_reset(&m_table_ids); + DBUG_VOID_RETURN; +} + +#endif diff --git a/sql/rpl_tblmap.h b/sql/rpl_tblmap.h new file mode 100644 index 00000000000..23864bd329e --- /dev/null +++ b/sql/rpl_tblmap.h @@ -0,0 +1,105 @@ +/* Copyright (C) 2005 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef TABLE_MAPPING_H +#define TABLE_MAPPING_H + +/* Forward declarations */ +struct st_table; +typedef st_table TABLE; + +/* + CLASS table_mapping + + RESPONSIBILITIES + The table mapping is used to map table id's to table pointers + + COLLABORATION + RELAY_LOG For mapping table id:s to tables when receiving events. + */ + +/* + Guilhem to Mats: + in the table_mapping class, the memory is allocated and never freed (until + destruction). So this is a good candidate for allocating inside a MEM_ROOT: + it gives the efficient allocation in chunks (like in expand()). So I have + introduced a MEM_ROOT. + + Note that inheriting from Sql_alloc had no effect: it has effects only when + "ptr= new table_mapping" is called, and this is never called. And it would + then allocate from thd->mem_root which is a highly volatile object (reset + from example after executing each query, see dispatch_command(), it has a + free_root() at end); as the table_mapping object is supposed to live longer + than a query, it was dangerous. + A dedicated MEM_ROOT needs to be used, see below. +*/ + +class table_mapping { + +private: + MEM_ROOT m_mem_root; + +public: + + enum enum_error { + ERR_NO_ERROR = 0, + ERR_LIMIT_EXCEEDED, + ERR_MEMORY_ALLOCATION + }; + + table_mapping(); + ~table_mapping(); + + TABLE* get_table(ulong table_id); + + int set_table(ulong table_id, TABLE* table); + int remove_table(ulong table_id); + void clear_tables(); + ulong count() const { return m_table_ids.records; } + +private: + /* + This is a POD (Plain Old Data). Keep it that way (we apply offsetof() to + it, which only works for PODs) + */ + struct entry { + ulong table_id; + union { + TABLE *table; + entry *next; + }; + }; + + entry *find_entry(ulong table_id) + { + return (entry *)hash_search(&m_table_ids, + (byte*)&table_id, + sizeof(table_id)); + } + int expand(); + + /* + Head of the list of free entries; "free" in the sense that it's an + allocated entry free for use, NOT in the sense that it's freed + memory. + */ + entry *m_free; + + /* Correspondance between an id (a number) and a TABLE object */ + HASH m_table_ids; +}; + +#endif diff --git a/sql/set_var.cc b/sql/set_var.cc index 7be79ab59f0..bf3fdd8c8d7 100644 --- a/sql/set_var.cc +++ b/sql/set_var.cc @@ -58,16 +58,59 @@ #include <my_getopt.h> #include <thr_alarm.h> #include <myisam.h> -#ifdef HAVE_BERKELEY_DB -#include "ha_berkeley.h" -#endif -#ifdef HAVE_INNOBASE_DB -#include "ha_innodb.h" -#endif -#ifdef HAVE_NDBCLUSTER_DB -#include "ha_ndbcluster.h" + +/* WITH_BERKELEY_STORAGE_ENGINE */ +extern bool berkeley_shared_data; +extern ulong berkeley_max_lock, berkeley_log_buffer_size; +extern ulonglong berkeley_cache_size; +extern ulong berkeley_region_size, berkeley_cache_parts; +extern char *berkeley_home, *berkeley_tmpdir, *berkeley_logdir; + +/* WITH_INNOBASE_STORAGE_ENGINE */ +extern uint innobase_flush_log_at_trx_commit; +extern ulong innobase_fast_shutdown; +extern long innobase_mirrored_log_groups, innobase_log_files_in_group; +extern longlong innobase_log_file_size; +extern long innobase_log_buffer_size; +extern longlong innobase_buffer_pool_size; +extern long innobase_additional_mem_pool_size; +extern long innobase_buffer_pool_awe_mem_mb; +extern long innobase_file_io_threads, innobase_lock_wait_timeout; +extern long innobase_force_recovery; +extern long innobase_open_files; +extern char *innobase_data_home_dir, *innobase_data_file_path; +extern char *innobase_log_group_home_dir, *innobase_log_arch_dir; +extern char *innobase_unix_file_flush_method; +/* The following variables have to be my_bool for SHOW VARIABLES to work */ +extern my_bool innobase_log_archive, + innobase_use_doublewrite, + innobase_use_checksums, + innobase_file_per_table, + innobase_locks_unsafe_for_binlog; + +extern "C" { +extern ulong srv_max_buf_pool_modified_pct; +extern ulong srv_max_purge_lag; +extern ulong srv_auto_extend_increment; +extern ulong srv_n_spin_wait_rounds; +extern ulong srv_n_free_tickets_to_enter; +extern ulong srv_thread_sleep_delay; +extern ulong srv_thread_concurrency; +extern ulong srv_commit_concurrency; +} + +/* WITH_NDBCLUSTER_STORAGE_ENGINE */ +extern ulong ndb_cache_check_time; +extern ulong ndb_extra_logging; +#ifdef HAVE_NDB_BINLOG +extern ulong ndb_report_thresh_binlog_epoch_slip; +extern ulong ndb_report_thresh_binlog_mem_usage; #endif + + +extern my_bool event_executor_running_global_var; + static HASH system_variable_hash; const char *bool_type_names[]= { "OFF", "ON", NullS }; TYPELIB bool_typelib= @@ -98,7 +141,9 @@ static int check_log_update(THD *thd, set_var *var); static bool set_log_update(THD *thd, set_var *var); static int check_pseudo_thread_id(THD *thd, set_var *var); static bool set_log_bin(THD *thd, set_var *var); +void fix_binlog_format_after_update(THD *thd, enum_var_type type); static void fix_low_priority_updates(THD *thd, enum_var_type type); +static int check_tx_isolation(THD *thd, set_var *var); static void fix_tx_isolation(THD *thd, enum_var_type type); static int check_completion_type(THD *thd, set_var *var); static void fix_completion_type(THD *thd, enum_var_type type); @@ -120,7 +165,6 @@ static KEY_CACHE *create_key_cache(const char *name, uint length); void fix_sql_mode_var(THD *thd, enum_var_type type); static byte *get_error_count(THD *thd); static byte *get_warning_count(THD *thd); -static byte *get_have_innodb(THD *thd); /* Variable definition list @@ -129,6 +173,9 @@ static byte *get_have_innodb(THD *thd); alphabetic order */ +sys_var *sys_var::first= NULL; +uint sys_var::sys_vars= 0; + sys_var_thd_ulong sys_auto_increment_increment("auto_increment_increment", &SV::auto_increment_increment); sys_var_thd_ulong sys_auto_increment_offset("auto_increment_offset", @@ -139,6 +186,8 @@ sys_var_bool_ptr sys_automatic_sp_privileges("automatic_sp_privileges", sys_var_long_ptr sys_binlog_cache_size("binlog_cache_size", &binlog_cache_size); +sys_var_thd_binlog_format sys_binlog_format("binlog_format", + &SV::binlog_format); sys_var_thd_ulong sys_bulk_insert_buff_size("bulk_insert_buffer_size", &SV::bulk_insert_buff_size); sys_var_character_set_server sys_character_set_server("character_set_server"); @@ -160,6 +209,9 @@ sys_var_long_ptr sys_concurrent_insert("concurrent_insert", &myisam_concurrent_insert); sys_var_long_ptr sys_connect_timeout("connect_timeout", &connect_timeout); +#ifndef DBUG_OFF +sys_var_thd_dbug sys_dbug("debug"); +#endif sys_var_enum sys_delay_key_write("delay_key_write", &delay_key_write_options, &delay_key_write_typelib, @@ -170,6 +222,9 @@ sys_var_long_ptr sys_delayed_insert_timeout("delayed_insert_timeout", &delayed_insert_timeout); sys_var_long_ptr sys_delayed_queue_size("delayed_queue_size", &delayed_queue_size); +sys_var_event_executor sys_event_executor("event_scheduler", + (my_bool *) + &event_executor_running_global_var); sys_var_long_ptr sys_expire_logs_days("expire_logs_days", &expire_logs_days); sys_var_bool_ptr sys_flush("flush", &myisam_flush); @@ -276,6 +331,8 @@ sys_var_long_ptr sys_myisam_data_pointer_size("myisam_data_pointer_size", sys_var_thd_ulonglong sys_myisam_max_sort_file_size("myisam_max_sort_file_size", &SV::myisam_max_sort_file_size, fix_myisam_max_sort_file_size, 1); sys_var_thd_ulong sys_myisam_repair_threads("myisam_repair_threads", &SV::myisam_repair_threads); sys_var_thd_ulong sys_myisam_sort_buffer_size("myisam_sort_buffer_size", &SV::myisam_sort_buff_size); +sys_var_bool_ptr sys_myisam_use_mmap("myisam_use_mmap", + &opt_myisam_use_mmap); sys_var_thd_enum sys_myisam_stats_method("myisam_stats_method", &SV::myisam_stats_method, @@ -294,6 +351,8 @@ sys_var_thd_ulong sys_net_retry_count("net_retry_count", &SV::net_retry_count, 0, fix_net_retry_count); sys_var_thd_bool sys_new_mode("new", &SV::new_mode); +sys_var_thd_bool sys_old_alter_table("old_alter_table", + &SV::old_alter_table); sys_var_thd_bool sys_old_passwords("old_passwords", &SV::old_passwords); sys_var_thd_ulong sys_optimizer_prune_level("optimizer_prune_level", &SV::optimizer_prune_level); @@ -383,7 +442,9 @@ sys_var_thd_ulong sys_sync_replication_timeout( &SV::sync_replication_timeout); #endif sys_var_bool_ptr sys_sync_frm("sync_frm", &opt_sync_frm); -sys_var_long_ptr sys_table_cache_size("table_cache", +sys_var_long_ptr sys_table_def_size("table_definition_cache", + &table_def_size); +sys_var_long_ptr sys_table_cache_size("table_open_cache", &table_cache_size); sys_var_long_ptr sys_table_lock_wait_timeout("table_lock_wait_timeout", &table_lock_wait_timeout); @@ -392,7 +453,8 @@ sys_var_long_ptr sys_thread_cache_size("thread_cache_size", sys_var_thd_enum sys_tx_isolation("tx_isolation", &SV::tx_isolation, &tx_isolation_typelib, - fix_tx_isolation); + fix_tx_isolation, + check_tx_isolation); sys_var_thd_ulong sys_tmp_table_size("tmp_table_size", &SV::tmp_table_size); sys_var_bool_ptr sys_timed_mutexes("timed_mutexes", @@ -400,7 +462,6 @@ sys_var_bool_ptr sys_timed_mutexes("timed_mutexes", sys_var_thd_ulong sys_net_wait_timeout("wait_timeout", &SV::net_wait_timeout); -#ifdef HAVE_INNOBASE_DB sys_var_long_ptr sys_innodb_fast_shutdown("innodb_fast_shutdown", &innobase_fast_shutdown); sys_var_long_ptr sys_innodb_max_dirty_pages_pct("innodb_max_dirty_pages_pct", @@ -423,30 +484,43 @@ sys_var_long_ptr sys_innodb_thread_concurrency("innodb_thread_concurrency", &srv_thread_concurrency); sys_var_long_ptr sys_innodb_commit_concurrency("innodb_commit_concurrency", &srv_commit_concurrency); -sys_var_long_ptr sys_innodb_flush_log_at_trx_commit( - "innodb_flush_log_at_trx_commit", - &srv_flush_log_at_trx_commit); -#endif /* Condition pushdown to storage engine */ sys_var_thd_bool sys_engine_condition_pushdown("engine_condition_pushdown", &SV::engine_condition_pushdown); -#ifdef HAVE_NDBCLUSTER_DB /* ndb thread specific variable settings */ sys_var_thd_ulong sys_ndb_autoincrement_prefetch_sz("ndb_autoincrement_prefetch_sz", &SV::ndb_autoincrement_prefetch_sz); sys_var_thd_bool sys_ndb_force_send("ndb_force_send", &SV::ndb_force_send); +#ifdef HAVE_NDB_BINLOG +sys_var_long_ptr +sys_ndb_report_thresh_binlog_epoch_slip("ndb_report_thresh_binlog_epoch_slip", + &ndb_report_thresh_binlog_epoch_slip); +sys_var_long_ptr +sys_ndb_report_thresh_binlog_mem_usage("ndb_report_thresh_binlog_mem_usage", + &ndb_report_thresh_binlog_mem_usage); +#endif sys_var_thd_bool sys_ndb_use_exact_count("ndb_use_exact_count", &SV::ndb_use_exact_count); sys_var_thd_bool sys_ndb_use_transactions("ndb_use_transactions", &SV::ndb_use_transactions); sys_var_long_ptr sys_ndb_cache_check_time("ndb_cache_check_time", &ndb_cache_check_time); -#endif +sys_var_thd_bool +sys_ndb_index_stat_enable("ndb_index_stat_enable", + &SV::ndb_index_stat_enable); +sys_var_thd_ulong +sys_ndb_index_stat_cache_entries("ndb_index_stat_cache_entries", + &SV::ndb_index_stat_cache_entries); +sys_var_thd_ulong +sys_ndb_index_stat_update_freq("ndb_index_stat_update_freq", + &SV::ndb_index_stat_update_freq); +sys_var_long_ptr +sys_ndb_extra_logging("ndb_extra_logging", &ndb_extra_logging); /* Time/date/datetime formats */ @@ -550,225 +624,92 @@ sys_var_thd_time_zone sys_time_zone("time_zone"); /* Read only variables */ sys_var_const_str sys_os("version_compile_os", SYSTEM_TYPE); -sys_var_readonly sys_have_innodb("have_innodb", OPT_GLOBAL, - SHOW_CHAR, get_have_innodb); +sys_var_have_variable sys_have_archive_db("have_archive", &have_archive_db); +sys_var_have_variable sys_have_berkeley_db("have_bdb", &have_berkeley_db); +sys_var_have_variable sys_have_blackhole_db("have_blackhole_engine", + &have_blackhole_db); +sys_var_have_variable sys_have_compress("have_compress", &have_compress); +sys_var_have_variable sys_have_crypt("have_crypt", &have_crypt); +sys_var_have_variable sys_have_csv_db("have_csv", &have_csv_db); +sys_var_have_variable sys_have_example_db("have_example_engine", + &have_example_db); +sys_var_have_variable sys_have_federated_db("have_federated_engine", + &have_federated_db); +sys_var_have_variable sys_have_geometry("have_geometry", &have_geometry); +sys_var_have_variable sys_have_innodb("have_innodb", &have_innodb); +sys_var_have_variable sys_have_ndbcluster("have_ndbcluster", &have_ndbcluster); +sys_var_have_variable sys_have_openssl("have_openssl", &have_openssl); +sys_var_have_variable sys_have_partition_db("have_partitioning", + &have_partition_db); +sys_var_have_variable sys_have_query_cache("have_query_cache", + &have_query_cache); +sys_var_have_variable sys_have_rtree_keys("have_rtree_keys", &have_rtree_keys); +sys_var_have_variable sys_have_symlink("have_symlink", &have_symlink); +sys_var_have_variable sys_have_row_based_replication("have_row_based_replication",&have_row_based_replication); /* Global read-only variable describing server license */ sys_var_const_str sys_license("license", STRINGIFY_ARG(LICENSE)); - -/* - List of all variables for initialisation and storage in hash - This is sorted in alphabetical order to make it easy to add new variables - - If the variable is not in this list, it can't be changed with - SET variable_name= -*/ - -sys_var *sys_variables[]= -{ - &sys_auto_is_null, - &sys_auto_increment_increment, - &sys_auto_increment_offset, - &sys_autocommit, - &sys_automatic_sp_privileges, - &sys_big_tables, - &sys_big_selects, - &sys_binlog_cache_size, - &sys_buffer_results, - &sys_bulk_insert_buff_size, - &sys_character_set_server, - &sys_character_set_database, - &sys_character_set_client, - &sys_character_set_connection, - &sys_character_set_results, - &sys_character_set_filesystem, - &sys_charset_system, - &sys_collation_connection, - &sys_collation_database, - &sys_collation_server, - &sys_completion_type, - &sys_concurrent_insert, - &sys_connect_timeout, - &sys_date_format, - &sys_datetime_format, - &sys_div_precincrement, - &sys_default_week_format, - &sys_delay_key_write, - &sys_delayed_insert_limit, - &sys_delayed_insert_timeout, - &sys_delayed_queue_size, - &sys_error_count, - &sys_expire_logs_days, - &sys_flush, - &sys_flush_time, - &sys_ft_boolean_syntax, - &sys_foreign_key_checks, - &sys_group_concat_max_len, - &sys_have_innodb, - &sys_identity, - &sys_init_connect, - &sys_init_slave, - &sys_insert_id, - &sys_interactive_timeout, - &sys_join_buffer_size, - &sys_key_buffer_size, - &sys_key_cache_block_size, - &sys_key_cache_division_limit, - &sys_key_cache_age_threshold, - &sys_last_insert_id, - &sys_license, - &sys_local_infile, - &sys_log_binlog, - &sys_log_off, - &sys_log_update, - &sys_log_warnings, - &sys_long_query_time, - &sys_low_priority_updates, - &sys_max_allowed_packet, - &sys_max_binlog_cache_size, - &sys_max_binlog_size, - &sys_max_connect_errors, - &sys_max_connections, - &sys_max_delayed_threads, - &sys_max_error_count, - &sys_max_insert_delayed_threads, - &sys_max_heap_table_size, - &sys_max_join_size, - &sys_max_length_for_sort_data, - &sys_max_relay_log_size, - &sys_max_seeks_for_key, - &sys_max_sort_length, - &sys_max_sp_recursion_depth, - &sys_max_tmp_tables, - &sys_max_user_connections, - &sys_max_write_lock_count, - &sys_multi_range_count, - &sys_myisam_data_pointer_size, - &sys_myisam_max_sort_file_size, - &sys_myisam_repair_threads, - &sys_myisam_sort_buffer_size, - &sys_myisam_stats_method, - &sys_net_buffer_length, - &sys_net_read_timeout, - &sys_net_retry_count, - &sys_net_wait_timeout, - &sys_net_write_timeout, - &sys_new_mode, - &sys_old_passwords, - &sys_optimizer_prune_level, - &sys_optimizer_search_depth, - &sys_preload_buff_size, - &sys_pseudo_thread_id, - &sys_query_alloc_block_size, - &sys_query_cache_size, - &sys_query_prealloc_size, -#ifdef HAVE_QUERY_CACHE - &sys_query_cache_limit, - &sys_query_cache_min_res_unit, - &sys_query_cache_type, - &sys_query_cache_wlock_invalidate, -#endif /* HAVE_QUERY_CACHE */ - &sys_quote_show_create, - &sys_rand_seed1, - &sys_rand_seed2, - &sys_range_alloc_block_size, - &sys_readonly, - &sys_read_buff_size, - &sys_read_rnd_buff_size, -#ifdef HAVE_REPLICATION - &sys_relay_log_purge, -#endif - &sys_rpl_recovery_rank, - &sys_safe_updates, - &sys_secure_auth, - &sys_select_limit, - &sys_server_id, #ifdef HAVE_REPLICATION - &sys_slave_compressed_protocol, - &sys_slave_net_timeout, - &sys_slave_trans_retries, - &sys_slave_skip_counter, -#endif - &sys_slow_launch_time, - &sys_sort_buffer, - &sys_sql_big_tables, - &sys_sql_low_priority_updates, - &sys_sql_max_join_size, - &sys_sql_mode, - &sys_sql_warnings, - &sys_sql_notes, - &sys_storage_engine, -#ifdef HAVE_REPLICATION - &sys_sync_binlog_period, - &sys_sync_replication, - &sys_sync_replication_slave_id, - &sys_sync_replication_timeout, -#endif - &sys_sync_frm, - &sys_table_cache_size, - &sys_table_lock_wait_timeout, - &sys_table_type, - &sys_thread_cache_size, - &sys_time_format, - &sys_timed_mutexes, - &sys_timestamp, - &sys_time_zone, - &sys_tmp_table_size, - &sys_trans_alloc_block_size, - &sys_trans_prealloc_size, - &sys_tx_isolation, - &sys_os, -#ifdef HAVE_INNOBASE_DB - &sys_innodb_fast_shutdown, - &sys_innodb_max_dirty_pages_pct, - &sys_innodb_max_purge_lag, - &sys_innodb_table_locks, - &sys_innodb_support_xa, - &sys_innodb_max_purge_lag, - &sys_innodb_autoextend_increment, - &sys_innodb_sync_spin_loops, - &sys_innodb_concurrency_tickets, - &sys_innodb_thread_sleep_delay, - &sys_innodb_thread_concurrency, - &sys_innodb_commit_concurrency, - &sys_innodb_flush_log_at_trx_commit, -#endif - &sys_trust_routine_creators, - &sys_trust_function_creators, - &sys_engine_condition_pushdown, -#ifdef HAVE_NDBCLUSTER_DB - &sys_ndb_autoincrement_prefetch_sz, - &sys_ndb_cache_check_time, - &sys_ndb_force_send, - &sys_ndb_use_exact_count, - &sys_ndb_use_transactions, -#endif - &sys_unique_checks, - &sys_updatable_views_with_limit, - &sys_warning_count -}; +static int show_slave_skip_errors(THD *thd, SHOW_VAR *var, char *buff) +{ + var->type=SHOW_CHAR; + var->value= buff; + if (!use_slave_mask || bitmap_is_clear_all(&slave_error_mask)) + { + var->value= const_cast<char *>("OFF"); + } + else if (bitmap_is_set_all(&slave_error_mask)) + { + var->value= const_cast<char *>("ALL"); + } + else + { + /* 10 is enough assuming errors are max 4 digits */ + int i; + var->value= buff; + for (i= 1; + i < MAX_SLAVE_ERROR && + (buff - var->value) < SHOW_VAR_FUNC_BUFF_SIZE; + i++) + { + if (bitmap_is_set(&slave_error_mask, i)) + { + buff= int10_to_str(i, buff, 10); + *buff++= ','; + } + } + if (var->value != buff) + buff--; // Remove last ',' + if (i < MAX_SLAVE_ERROR) + buff= strmov(buff, "..."); // Couldn't show all errors + *buff=0; + } + return 0; +} +#endif /* HAVE_REPLICATION */ /* Variables shown by SHOW variables in alphabetical order */ -struct show_var_st init_vars[]= { +SHOW_VAR init_vars[]= { {"auto_increment_increment", (char*) &sys_auto_increment_increment, SHOW_SYS}, {"auto_increment_offset", (char*) &sys_auto_increment_offset, SHOW_SYS}, {sys_automatic_sp_privileges.name,(char*) &sys_automatic_sp_privileges, SHOW_SYS}, {"back_log", (char*) &back_log, SHOW_LONG}, {"basedir", mysql_home, SHOW_CHAR}, -#ifdef HAVE_BERKELEY_DB - {"bdb_cache_size", (char*) &berkeley_cache_size, SHOW_LONG}, + {"bdb_cache_parts", (char*) &berkeley_cache_parts, SHOW_LONG}, + {"bdb_cache_size", (char*) &berkeley_cache_size, SHOW_LONGLONG}, {"bdb_home", (char*) &berkeley_home, SHOW_CHAR_PTR}, {"bdb_log_buffer_size", (char*) &berkeley_log_buffer_size, SHOW_LONG}, {"bdb_logdir", (char*) &berkeley_logdir, SHOW_CHAR_PTR}, {"bdb_max_lock", (char*) &berkeley_max_lock, SHOW_LONG}, + {"bdb_region_size", (char*) &berkeley_region_size, SHOW_LONG}, {"bdb_shared_data", (char*) &berkeley_shared_data, SHOW_BOOL}, {"bdb_tmpdir", (char*) &berkeley_tmpdir, SHOW_CHAR_PTR}, -#endif {sys_binlog_cache_size.name,(char*) &sys_binlog_cache_size, SHOW_SYS}, + {sys_binlog_format.name, (char*) &sys_binlog_format, SHOW_SYS}, {sys_bulk_insert_buff_size.name,(char*) &sys_bulk_insert_buff_size,SHOW_SYS}, {sys_character_set_client.name,(char*) &sys_character_set_client, SHOW_SYS}, {sys_character_set_connection.name,(char*) &sys_character_set_connection,SHOW_SYS}, @@ -787,14 +728,18 @@ struct show_var_st init_vars[]= { {"datadir", mysql_real_data_home, SHOW_CHAR}, {sys_date_format.name, (char*) &sys_date_format, SHOW_SYS}, {sys_datetime_format.name, (char*) &sys_datetime_format, SHOW_SYS}, +#ifndef DBUG_OFF + {sys_dbug.name, (char*) &sys_dbug, SHOW_SYS}, +#endif {sys_default_week_format.name, (char*) &sys_default_week_format, SHOW_SYS}, {sys_delay_key_write.name, (char*) &sys_delay_key_write, SHOW_SYS}, {sys_delayed_insert_limit.name, (char*) &sys_delayed_insert_limit,SHOW_SYS}, {sys_delayed_insert_timeout.name, (char*) &sys_delayed_insert_timeout, SHOW_SYS}, {sys_delayed_queue_size.name,(char*) &sys_delayed_queue_size, SHOW_SYS}, {sys_div_precincrement.name,(char*) &sys_div_precincrement,SHOW_SYS}, - {sys_engine_condition_pushdown.name, + {sys_engine_condition_pushdown.name, (char*) &sys_engine_condition_pushdown, SHOW_SYS}, + {sys_event_executor.name, (char*) &sys_event_executor, SHOW_SYS}, {sys_expire_logs_days.name, (char*) &sys_expire_logs_days, SHOW_SYS}, {sys_flush.name, (char*) &sys_flush, SHOW_SYS}, {sys_flush_time.name, (char*) &sys_flush_time, SHOW_SYS}, @@ -804,27 +749,26 @@ struct show_var_st init_vars[]= { {"ft_query_expansion_limit",(char*) &ft_query_expansion_limit, SHOW_LONG}, {"ft_stopword_file", (char*) &ft_stopword_file, SHOW_CHAR_PTR}, {sys_group_concat_max_len.name, (char*) &sys_group_concat_max_len, SHOW_SYS}, - {"have_archive", (char*) &have_archive_db, SHOW_HAVE}, - {"have_bdb", (char*) &have_berkeley_db, SHOW_HAVE}, - {"have_blackhole_engine", (char*) &have_blackhole_db, SHOW_HAVE}, - {"have_compress", (char*) &have_compress, SHOW_HAVE}, - {"have_crypt", (char*) &have_crypt, SHOW_HAVE}, - {"have_csv", (char*) &have_csv_db, SHOW_HAVE}, - {"have_example_engine", (char*) &have_example_db, SHOW_HAVE}, - {"have_federated_engine", (char*) &have_federated_db, SHOW_HAVE}, - {"have_geometry", (char*) &have_geometry, SHOW_HAVE}, - {"have_innodb", (char*) &have_innodb, SHOW_HAVE}, - {"have_isam", (char*) &have_isam, SHOW_HAVE}, - {"have_ndbcluster", (char*) &have_ndbcluster, SHOW_HAVE}, - {"have_openssl", (char*) &have_openssl, SHOW_HAVE}, - {"have_query_cache", (char*) &have_query_cache, SHOW_HAVE}, - {"have_raid", (char*) &have_raid, SHOW_HAVE}, - {"have_rtree_keys", (char*) &have_rtree_keys, SHOW_HAVE}, - {"have_symlink", (char*) &have_symlink, SHOW_HAVE}, + {sys_have_archive_db.name, (char*) &have_archive_db, SHOW_HAVE}, + {sys_have_berkeley_db.name, (char*) &have_berkeley_db, SHOW_HAVE}, + {sys_have_blackhole_db.name,(char*) &have_blackhole_db, SHOW_HAVE}, + {sys_have_compress.name, (char*) &have_compress, SHOW_HAVE}, + {sys_have_crypt.name, (char*) &have_crypt, SHOW_HAVE}, + {sys_have_csv_db.name, (char*) &have_csv_db, SHOW_HAVE}, + {sys_have_example_db.name, (char*) &have_example_db, SHOW_HAVE}, + {sys_have_federated_db.name,(char*) &have_federated_db, SHOW_HAVE}, + {sys_have_geometry.name, (char*) &have_geometry, SHOW_HAVE}, + {sys_have_innodb.name, (char*) &have_innodb, SHOW_HAVE}, + {sys_have_ndbcluster.name, (char*) &have_ndbcluster, SHOW_HAVE}, + {sys_have_openssl.name, (char*) &have_openssl, SHOW_HAVE}, + {sys_have_partition_db.name,(char*) &have_partition_db, SHOW_HAVE}, + {sys_have_query_cache.name, (char*) &have_query_cache, SHOW_HAVE}, + {sys_have_row_based_replication.name, (char*) &have_row_based_replication, SHOW_HAVE}, + {sys_have_rtree_keys.name, (char*) &have_rtree_keys, SHOW_HAVE}, + {sys_have_symlink.name, (char*) &have_symlink, SHOW_HAVE}, {"init_connect", (char*) &sys_init_connect, SHOW_SYS}, {"init_file", (char*) &opt_init_file, SHOW_CHAR_PTR}, {"init_slave", (char*) &sys_init_slave, SHOW_SYS}, -#ifdef HAVE_INNOBASE_DB {"innodb_additional_mem_pool_size", (char*) &innobase_additional_mem_pool_size, SHOW_LONG }, {sys_innodb_autoextend_increment.name, (char*) &sys_innodb_autoextend_increment, SHOW_SYS}, {"innodb_buffer_pool_awe_mem_mb", (char*) &innobase_buffer_pool_awe_mem_mb, SHOW_LONG }, @@ -838,7 +782,7 @@ struct show_var_st init_vars[]= { {sys_innodb_fast_shutdown.name,(char*) &sys_innodb_fast_shutdown, SHOW_SYS}, {"innodb_file_io_threads", (char*) &innobase_file_io_threads, SHOW_LONG }, {"innodb_file_per_table", (char*) &innobase_file_per_table, SHOW_MY_BOOL}, - {sys_innodb_flush_log_at_trx_commit.name, (char*) &sys_innodb_flush_log_at_trx_commit, SHOW_SYS}, + {"innodb_flush_log_at_trx_commit", (char*) &innobase_flush_log_at_trx_commit, SHOW_INT}, {"innodb_flush_method", (char*) &innobase_unix_file_flush_method, SHOW_CHAR_PTR}, {"innodb_force_recovery", (char*) &innobase_force_recovery, SHOW_LONG }, {"innodb_lock_wait_timeout", (char*) &innobase_lock_wait_timeout, SHOW_LONG }, @@ -858,7 +802,6 @@ struct show_var_st init_vars[]= { {sys_innodb_table_locks.name, (char*) &sys_innodb_table_locks, SHOW_SYS}, {sys_innodb_thread_concurrency.name, (char*) &sys_innodb_thread_concurrency, SHOW_SYS}, {sys_innodb_thread_sleep_delay.name, (char*) &sys_innodb_thread_sleep_delay, SHOW_SYS}, -#endif {sys_interactive_timeout.name,(char*) &sys_interactive_timeout, SHOW_SYS}, {sys_join_buffer_size.name, (char*) &sys_join_buffer_size, SHOW_SYS}, {sys_key_buffer_size.name, (char*) &sys_key_buffer_size, SHOW_SYS}, @@ -921,23 +864,33 @@ struct show_var_st init_vars[]= { {sys_myisam_sort_buffer_size.name, (char*) &sys_myisam_sort_buffer_size, SHOW_SYS}, {sys_myisam_stats_method.name, (char*) &sys_myisam_stats_method, SHOW_SYS}, + {sys_myisam_use_mmap.name, (char*) &sys_myisam_use_mmap, SHOW_SYS}, #ifdef __NT__ {"named_pipe", (char*) &opt_enable_named_pipe, SHOW_MY_BOOL}, #endif -#ifdef HAVE_NDBCLUSTER_DB {sys_ndb_autoincrement_prefetch_sz.name, (char*) &sys_ndb_autoincrement_prefetch_sz, SHOW_SYS}, + {sys_ndb_cache_check_time.name,(char*) &sys_ndb_cache_check_time, SHOW_SYS}, + {sys_ndb_extra_logging.name,(char*) &sys_ndb_extra_logging, SHOW_SYS}, {sys_ndb_force_send.name, (char*) &sys_ndb_force_send, SHOW_SYS}, + {sys_ndb_index_stat_cache_entries.name, (char*) &sys_ndb_index_stat_cache_entries, SHOW_SYS}, + {sys_ndb_index_stat_enable.name, (char*) &sys_ndb_index_stat_enable, SHOW_SYS}, + {sys_ndb_index_stat_update_freq.name, (char*) &sys_ndb_index_stat_update_freq, SHOW_SYS}, +#ifdef HAVE_NDB_BINLOG + {sys_ndb_report_thresh_binlog_epoch_slip.name, + (char*) &sys_ndb_report_thresh_binlog_epoch_slip, SHOW_SYS}, + {sys_ndb_report_thresh_binlog_mem_usage.name, + (char*) &sys_ndb_report_thresh_binlog_mem_usage, SHOW_SYS}, +#endif {sys_ndb_use_exact_count.name,(char*) &sys_ndb_use_exact_count, SHOW_SYS}, {sys_ndb_use_transactions.name,(char*) &sys_ndb_use_transactions, SHOW_SYS}, - {sys_ndb_cache_check_time.name,(char*) &sys_ndb_cache_check_time, SHOW_SYS}, -#endif {sys_net_buffer_length.name,(char*) &sys_net_buffer_length, SHOW_SYS}, {sys_net_read_timeout.name, (char*) &sys_net_read_timeout, SHOW_SYS}, {sys_net_retry_count.name, (char*) &sys_net_retry_count, SHOW_SYS}, {sys_net_write_timeout.name,(char*) &sys_net_write_timeout, SHOW_SYS}, {sys_new_mode.name, (char*) &sys_new_mode, SHOW_SYS}, + {sys_old_alter_table.name, (char*) &sys_old_alter_table, SHOW_SYS}, {sys_old_passwords.name, (char*) &sys_old_passwords, SHOW_SYS}, {"open_files_limit", (char*) &open_files_limit, SHOW_LONG}, {sys_optimizer_prune_level.name, (char*) &sys_optimizer_prune_level, @@ -945,6 +898,7 @@ struct show_var_st init_vars[]= { {sys_optimizer_search_depth.name,(char*) &sys_optimizer_search_depth, SHOW_SYS}, {"pid_file", (char*) pidfile_name, SHOW_CHAR}, + {"plugin_dir", (char*) opt_plugin_dir, SHOW_CHAR}, {"port", (char*) &mysqld_port, SHOW_INT}, {sys_preload_buff_size.name, (char*) &sys_preload_buff_size, SHOW_SYS}, {"protocol_version", (char*) &protocol_version, SHOW_INT}, @@ -984,7 +938,7 @@ struct show_var_st init_vars[]= { (char*) &sys_slave_compressed_protocol, SHOW_SYS}, {"slave_load_tmpdir", (char*) &slave_load_tmpdir, SHOW_CHAR_PTR}, {sys_slave_net_timeout.name,(char*) &sys_slave_net_timeout, SHOW_SYS}, - {"slave_skip_errors", (char*) &slave_error_mask, SHOW_SLAVE_SKIP_ERRORS}, + {"slave_skip_errors", (char*) &show_slave_skip_errors, SHOW_FUNC}, {sys_slave_trans_retries.name,(char*) &sys_slave_trans_retries, SHOW_SYS}, #endif {sys_slow_launch_time.name, (char*) &sys_slow_launch_time, SHOW_SYS}, @@ -1008,8 +962,9 @@ struct show_var_st init_vars[]= { #ifdef HAVE_TZNAME {"system_time_zone", system_time_zone, SHOW_CHAR}, #endif - {"table_cache", (char*) &table_cache_size, SHOW_LONG}, + {"table_definition_cache", (char*) &table_def_size, SHOW_LONG}, {"table_lock_wait_timeout", (char*) &table_lock_wait_timeout, SHOW_LONG }, + {"table_open_cache", (char*) &table_cache_size, SHOW_LONG}, {sys_table_type.name, (char*) &sys_table_type, SHOW_SYS}, {sys_thread_cache_size.name,(char*) &sys_thread_cache_size, SHOW_SYS}, #ifdef HAVE_THR_SETCONCURRENCY @@ -1028,9 +983,6 @@ struct show_var_st init_vars[]= { {sys_updatable_views_with_limit.name, (char*) &sys_updatable_views_with_limit,SHOW_SYS}, {"version", server_version, SHOW_CHAR}, -#ifdef HAVE_BERKELEY_DB - {"version_bdb", (char*) DB_VERSION_STRING, SHOW_CHAR}, -#endif {"version_comment", (char*) MYSQL_COMPILATION_COMMENT, SHOW_CHAR}, {"version_compile_machine", (char*) MACHINE_TYPE, SHOW_CHAR}, {sys_os.name, (char*) &sys_os, SHOW_SYS}, @@ -1077,7 +1029,7 @@ bool update_sys_var_str(sys_var_str *var_str, rw_lock_t *var_mutex, uint new_length= (var ? var->value->str_value.length() : 0); if (!old_value) old_value= (char*) ""; - if (!(res= my_strdup_with_length((byte*)old_value, new_length, MYF(0)))) + if (!(res= my_strndup((byte*)old_value, new_length, MYF(0)))) return 1; /* Replace the old value in such a way that the any thread using @@ -1180,10 +1132,23 @@ static void fix_max_join_size(THD *thd, enum_var_type type) /* + Can't change the 'next' tx_isolation while we are already in + a transaction +*/ +static int check_tx_isolation(THD *thd, set_var *var) +{ + if (var->type == OPT_DEFAULT && (thd->server_status & SERVER_STATUS_IN_TRANS)) + { + my_error(ER_CANT_CHANGE_TX_ISOLATION, MYF(0)); + return 1; + } + return 0; +} + +/* If one doesn't use the SESSION modifier, the isolation level is only active for the next command */ - static void fix_tx_isolation(THD *thd, enum_var_type type) { if (type == OPT_SESSION) @@ -1281,6 +1246,54 @@ extern void fix_delay_key_write(THD *thd, enum_var_type type) } } + +bool sys_var_thd_binlog_format::is_readonly() const +{ + /* + Under certain circumstances, the variable is read-only (unchangeable): + */ + THD *thd= current_thd; + /* + If RBR and open temporary tables, their CREATE TABLE may not be in the + binlog, so we can't toggle to SBR in this connection. + The test below will also prevent SET GLOBAL, well it was not easy to test + if global or not here. + And this test will also prevent switching from RBR to RBR (a no-op which + should not happen too often). + */ + if ((thd->variables.binlog_format == BINLOG_FORMAT_ROW) && + thd->temporary_tables) + { + my_error(ER_TEMP_TABLE_PREVENTS_SWITCH_OUT_OF_RBR, MYF(0)); + return 1; + } + /* + if in a stored function, it's too late to change mode + */ + if (thd->spcont && thd->prelocked_mode) + { + DBUG_ASSERT(thd->variables.binlog_format != BINLOG_FORMAT_ROW); + my_error(ER_STORED_FUNCTION_PREVENTS_SWITCH_BINLOG_FORMAT, MYF(0)); + return 1; + } +#ifdef HAVE_NDB_BINLOG + /* + Cluster does not support changing the binlog format on the fly yet. + */ + if (opt_bin_log && (have_ndbcluster == SHOW_OPTION_YES)) + { + my_error(ER_NDB_CANT_SWITCH_BINLOG_FORMAT, MYF(0)); + return 1; + } +#endif + return sys_var_thd_enum::is_readonly(); +} + +void fix_binlog_format_after_update(THD *thd, enum_var_type type) +{ + thd->reset_current_stmt_binlog_row_based(); +} + static void fix_max_binlog_size(THD *thd, enum_var_type type) { DBUG_ENTER("fix_max_binlog_size"); @@ -2762,7 +2775,7 @@ static bool set_log_update(THD *thd, set_var *var) See sql/mysqld.cc/, comments in function init_server_components() for an explaination of the different warnings we send below */ - + if (opt_sql_bin_update) { ((sys_var_thd_bit*) var->var)->bit_flag|= (OPTION_BIN_LOG | @@ -2820,12 +2833,6 @@ static byte *get_error_count(THD *thd) } -static byte *get_have_innodb(THD *thd) -{ - return (byte*) show_comp_option_name[have_innodb]; -} - - /**************************************************************************** Main handling of variables: - Initialisation @@ -2884,17 +2891,15 @@ static byte *get_sys_var_length(const sys_var *var, uint *length, void set_var_init() { - hash_init(&system_variable_hash, system_charset_info, - array_elements(sys_variables),0,0, - (hash_get_key) get_sys_var_length,0,0); - sys_var **var, **end; - for (var= sys_variables, end= sys_variables+array_elements(sys_variables) ; - var < end; - var++) + sys_var *var; + + hash_init(&system_variable_hash, system_charset_info, sys_var::sys_vars, 0, + 0, (hash_get_key) get_sys_var_length, 0, 0); + for (var= sys_var::first; var; var= var->next) { - (*var)->name_length= strlen((*var)->name); - (*var)->option_limits= find_option(my_long_options, (*var)->name); - my_hash_insert(&system_variable_hash, (byte*) *var); + var->name_length= strlen(var->name); + var->option_limits= find_option(my_long_options, var->name); + my_hash_insert(&system_variable_hash, (byte*) var); } /* Special cases @@ -3201,11 +3206,12 @@ bool sys_var_thd_storage_engine::check(THD *thd, set_var *var) if (var->value->result_type() == STRING_RESULT) { - enum db_type db_type; + LEX_STRING name; + handlerton *db_type; if (!(res=var->value->val_str(&str)) || - !(var->save_result.ulong_value= - (ulong) (db_type= ha_resolve_by_name(res->ptr(), res->length()))) || - ha_checktype(thd, db_type, 1, 0) != db_type) + !(name.str= (char *)res->ptr()) || !(name.length= res->length()) || + !(var->save_result.hton= db_type= ha_resolve_by_name(thd, &name)) || + ha_checktype(thd, ha_legacy_type(db_type), 1, 0) != db_type) { value= res ? res->c_ptr() : "NULL"; goto err; @@ -3223,38 +3229,34 @@ err: byte *sys_var_thd_storage_engine::value_ptr(THD *thd, enum_var_type type, LEX_STRING *base) { - ulong val; - val= ((type == OPT_GLOBAL) ? global_system_variables.*offset : - thd->variables.*offset); - const char *table_type= ha_get_storage_engine((enum db_type)val); - return (byte *) table_type; + handlerton *val; + val= (type == OPT_GLOBAL) ? global_system_variables.*offset : + thd->variables.*offset; + return (byte *) val->name; } void sys_var_thd_storage_engine::set_default(THD *thd, enum_var_type type) { if (type == OPT_GLOBAL) - global_system_variables.*offset= (ulong) DB_TYPE_MYISAM; + global_system_variables.*offset= &myisam_hton; else - thd->variables.*offset= (ulong) (global_system_variables.*offset); + thd->variables.*offset= global_system_variables.*offset; } bool sys_var_thd_storage_engine::update(THD *thd, set_var *var) { - if (var->type == OPT_GLOBAL) - global_system_variables.*offset= var->save_result.ulong_value; - else - thd->variables.*offset= var->save_result.ulong_value; + handlerton **value= &(global_system_variables.*offset); + if (var->type != OPT_GLOBAL) + value= &(thd->variables.*offset); + *value= var->save_result.hton; return 0; } void sys_var_thd_table_type::warn_deprecated(THD *thd) { - push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, - ER_WARN_DEPRECATED_SYNTAX, - ER(ER_WARN_DEPRECATED_SYNTAX), "table_type", - "storage_engine"); + WARN_DEPRECATED(thd, "5.2", "table_type", "'storage_engine'"); } void sys_var_thd_table_type::set_default(THD *thd, enum_var_type type) @@ -3511,10 +3513,8 @@ bool process_key_caches(int (* func) (const char *name, KEY_CACHE *)) void sys_var_trust_routine_creators::warn_deprecated(THD *thd) { - push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, - ER_WARN_DEPRECATED_SYNTAX, - ER(ER_WARN_DEPRECATED_SYNTAX), "log_bin_trust_routine_creators", - "log_bin_trust_function_creators"); + WARN_DEPRECATED(thd, "5.2", "log_bin_trust_routine_creators", + "'log_bin_trust_function_creators'"); } void sys_var_trust_routine_creators::set_default(THD *thd, enum_var_type type) @@ -3529,6 +3529,34 @@ bool sys_var_trust_routine_creators::update(THD *thd, set_var *var) return sys_var_bool_ptr::update(thd, var); } +/* even session variable here requires SUPER, because of -#o,file */ +bool sys_var_thd_dbug::check(THD *thd, set_var *var) +{ + return check_global_access(thd, SUPER_ACL); +} + +bool sys_var_thd_dbug::update(THD *thd, set_var *var) +{ + if (var->type == OPT_GLOBAL) + DBUG_SET_INITIAL(var ? var->value->str_value.c_ptr() : ""); + else + { + DBUG_POP(); + DBUG_PUSH(var ? var->value->str_value.c_ptr() : ""); + } + return 0; +} + +byte *sys_var_thd_dbug::value_ptr(THD *thd, enum_var_type type, LEX_STRING *b) +{ + char buf[256]; + if (type == OPT_GLOBAL) + DBUG_EXPLAIN_INITIAL(buf, sizeof(buf)); + else + DBUG_EXPLAIN(buf, sizeof(buf)); + return (byte*) thd->strdup(buf); +} + /**************************************************************************** Used templates ****************************************************************************/ diff --git a/sql/set_var.h b/sql/set_var.h index 046281ec7c5..f62d6ce8d2a 100644 --- a/sql/set_var.h +++ b/sql/set_var.h @@ -39,21 +39,30 @@ typedef byte *(*sys_value_ptr_func)(THD *thd); class sys_var { public: + static sys_var *first; + static uint sys_vars; + sys_var *next; struct my_option *option_limits; /* Updated by by set_var_init() */ uint name_length; /* Updated by by set_var_init() */ const char *name; - + sys_after_update_func after_update; bool no_support_one_shot; sys_var(const char *name_arg) :name(name_arg), after_update(0) , no_support_one_shot(1) - {} + { add_sys_var(); } sys_var(const char *name_arg,sys_after_update_func func) :name(name_arg), after_update(func) , no_support_one_shot(1) - {} + { add_sys_var(); } virtual ~sys_var() {} + void add_sys_var() + { + next= first; + first= this; + sys_vars++; + } virtual bool check(THD *thd, set_var *var); bool check_enum(THD *thd, set_var *var, TYPELIB *enum_names); bool check_set(THD *thd, set_var *var, TYPELIB *enum_names); @@ -325,19 +334,31 @@ class sys_var_thd_enum :public sys_var_thd protected: ulong SV::*offset; TYPELIB *enum_names; + sys_check_func check_func; public: sys_var_thd_enum(const char *name_arg, ulong SV::*offset_arg, TYPELIB *typelib) - :sys_var_thd(name_arg), offset(offset_arg), enum_names(typelib) + :sys_var_thd(name_arg), offset(offset_arg), enum_names(typelib), + check_func(0) {} sys_var_thd_enum(const char *name_arg, ulong SV::*offset_arg, TYPELIB *typelib, sys_after_update_func func) - :sys_var_thd(name_arg,func), offset(offset_arg), enum_names(typelib) + :sys_var_thd(name_arg,func), offset(offset_arg), enum_names(typelib), + check_func(0) + {} + sys_var_thd_enum(const char *name_arg, ulong SV::*offset_arg, + TYPELIB *typelib, sys_after_update_func func, + sys_check_func check) + :sys_var_thd(name_arg,func), offset(offset_arg), enum_names(typelib), + check_func(check) {} bool check(THD *thd, set_var *var) { - return check_enum(thd, var, enum_names); + int ret= 0; + if (check_func) + ret= (*check_func)(thd, var); + return ret ? ret : check_enum(thd, var, enum_names); } bool update(THD *thd, set_var *var); void set_default(THD *thd, enum_var_type type); @@ -370,9 +391,9 @@ public: class sys_var_thd_storage_engine :public sys_var_thd { protected: - ulong SV::*offset; + handlerton *SV::*offset; public: - sys_var_thd_storage_engine(const char *name_arg, ulong SV::*offset_arg) + sys_var_thd_storage_engine(const char *name_arg, handlerton *SV::*offset_arg) :sys_var_thd(name_arg), offset(offset_arg) {} bool check(THD *thd, set_var *var); @@ -389,7 +410,7 @@ SHOW_TYPE type() { return SHOW_CHAR; } class sys_var_thd_table_type :public sys_var_thd_storage_engine { public: - sys_var_thd_table_type(const char *name_arg, ulong SV::*offset_arg) + sys_var_thd_table_type(const char *name_arg, handlerton *SV::*offset_arg) :sys_var_thd_storage_engine(name_arg, offset_arg) {} void warn_deprecated(THD *thd); @@ -404,7 +425,7 @@ class sys_var_thd_bit :public sys_var_thd public: ulong bit_flag; bool reverse; - sys_var_thd_bit(const char *name_arg, + sys_var_thd_bit(const char *name_arg, sys_check_func c_func, sys_update_func u_func, ulong bit, bool reverse_arg=0) :sys_var_thd(name_arg), check_func(c_func), update_func(u_func), @@ -418,6 +439,19 @@ public: byte *value_ptr(THD *thd, enum_var_type type, LEX_STRING *base); }; +class sys_var_thd_dbug :public sys_var_thd +{ +public: + sys_var_thd_dbug(const char *name_arg) :sys_var_thd(name_arg) {} + bool check_update_type(Item_result type) { return type != STRING_RESULT; } + bool check(THD *thd, set_var *var); + SHOW_TYPE type() { return SHOW_CHAR; } + bool update(THD *thd, set_var *var); + void set_default(THD *thd, enum_var_type type) { DBUG_POP(); } + byte *value_ptr(THD *thd, enum_var_type type, LEX_STRING *b); +}; + + /* some variables that require special handling */ @@ -710,6 +744,30 @@ public: bool is_readonly() const { return 1; } }; + +class sys_var_have_variable: public sys_var +{ + SHOW_COMP_OPTION *have_variable; + +public: + sys_var_have_variable(const char *variable_name, + SHOW_COMP_OPTION *have_variable_arg): + sys_var(variable_name), + have_variable(have_variable_arg) + { } + byte *value_ptr(THD *thd, enum_var_type type, LEX_STRING *base) + { + return (byte*) show_comp_option_name[*have_variable]; + } + bool update(THD *thd, set_var *var) { return 1; } + bool check_default(enum_var_type type) { return 1; } + bool check_type(enum_var_type type) { return type != OPT_GLOBAL; } + bool check_update_type(Item_result type) { return 1; } + SHOW_TYPE type() { return SHOW_CHAR; } + bool is_readonly() const { return 1; } +}; + + class sys_var_thd_time_zone :public sys_var_thd { public: @@ -758,6 +816,29 @@ public: bool update(THD *thd, set_var *var); }; + +class sys_var_event_executor :public sys_var_bool_ptr +{ + /* We need a derived class only to have a warn_deprecated() */ +public: + sys_var_event_executor(const char *name_arg, my_bool *value_arg) : + sys_var_bool_ptr(name_arg, value_arg) {}; + bool update(THD *thd, set_var *var); +}; + +extern void fix_binlog_format_after_update(THD *thd, enum_var_type type); + +class sys_var_thd_binlog_format :public sys_var_thd_enum +{ +public: + sys_var_thd_binlog_format(const char *name_arg, ulong SV::*offset_arg) + :sys_var_thd_enum(name_arg, offset_arg, + &binlog_format_typelib, + fix_binlog_format_after_update) + {}; + bool is_readonly() const; +}; + /**************************************************************************** Classes for parsing of the SET command ****************************************************************************/ @@ -788,6 +869,7 @@ public: CHARSET_INFO *charset; ulong ulong_value; ulonglong ulonglong_value; + handlerton *hton; DATE_TIME_FORMAT *date_time_format; Time_zone *time_zone; } save_result; @@ -881,7 +963,7 @@ public: uint name_length_arg, gptr data_arg) :name_length(name_length_arg), data(data_arg) { - name= my_strdup_with_length((byte*) name_arg, name_length, MYF(MY_WME)); + name= my_strndup((byte*) name_arg, name_length, MYF(MY_WME)); links->push_back(this); } inline bool cmp(const char *name_cmp, uint length) @@ -900,6 +982,7 @@ public: /* updated in sql_acl.cc */ +extern sys_var_thd_bool sys_old_alter_table; extern sys_var_thd_bool sys_old_passwords; extern LEX_STRING default_key_cache_base; diff --git a/sql/share/charsets/Index.xml b/sql/share/charsets/Index.xml index 32fd1618a8b..6db14f22264 100644 --- a/sql/share/charsets/Index.xml +++ b/sql/share/charsets/Index.xml @@ -1,6 +1,6 @@ <?xml version='1.0' encoding="utf-8"?> -<charsets max-id="98"> +<charsets max-id="99"> <copyright> Copyright (C) 2003 MySQL AB @@ -370,6 +370,9 @@ To make maintaining easier please: <collation name="cp1250_croatian_ci" id="44"> <order>Croatian</order> </collation> + <collation name="cp1250_polish_ci" id="99"> + <order>Polish</order> + </collation> <collation name="cp1250_czech_cs" id="34" order="Czech"> <flag>compiled</flag> </collation> diff --git a/sql/share/charsets/cp1250.xml b/sql/share/charsets/cp1250.xml index 1e62e64ad5a..b83d0faeca8 100644 --- a/sql/share/charsets/cp1250.xml +++ b/sql/share/charsets/cp1250.xml @@ -153,6 +153,27 @@ BE BF C0 54 C1 C2 C3 C4 C5 41 5F C6 54 C7 54 6B </map> </collation> +<collation name="cp1250_polish_ci"> +<map> +00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F +10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F +20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F +30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F +40 41 43 44 48 4B 4D 4E 4F 50 52 53 54 56 57 59 +5B 5C 5D 5F 62 64 66 67 68 69 6B 90 91 92 93 94 +95 41 43 44 48 4B 4D 4E 4F 50 52 53 54 56 57 59 +5B 5C 5D 5F 62 64 66 67 68 69 6B 96 97 98 99 9A +9B 9C 9E 9F A0 A1 A2 A3 A4 A5 5F A6 60 62 6B 6C +A7 A8 A9 AA AB AC AD AE AF B0 5F B1 60 62 6B 6C +B2 B3 B4 55 B5 42 B6 B7 B8 B9 5F BA BB BC BD 6D +BE BF C0 55 C1 C2 C3 C4 C5 42 5F C6 54 C7 54 6D +5D 41 41 41 41 54 47 44 44 4B 4C 4B 4B 50 50 48 +48 58 57 5A 59 59 59 C8 5D 64 64 64 64 69 62 5F +5D 41 41 41 41 54 47 44 44 4B 4C 4B 4B 50 50 48 +48 58 57 5A 59 59 59 C9 5D 64 64 64 64 69 62 FF +</map> +</collation> + <collation name="cp1250_czech_ci"/> <collation name="cp1250_bin" flag="binary"/> diff --git a/sql/share/errmsg.txt b/sql/share/errmsg.txt index a860796f02d..d163da30e95 100644 --- a/sql/share/errmsg.txt +++ b/sql/share/errmsg.txt @@ -1479,30 +1479,30 @@ ER_DUP_KEYNAME 42000 S1009 swe "Nyckelnamn '%-.64s' finns flera gånger" ukr "äÕÂÌÀÀÞÅ ¦Í'Ñ ËÌÀÞÁ '%-.64s'" ER_DUP_ENTRY 23000 S1009 - cze "Zvojen-Bý klíè '%-.64s' (èíslo klíèe %d)" - dan "Ens værdier '%-.64s' for indeks %d" - nla "Dubbele ingang '%-.64s' voor zoeksleutel %d" - eng "Duplicate entry '%-.64s' for key %d" - jps "'%-.64s' ‚Í key %d ‚É‚¨‚¢‚Äd•¡‚µ‚Ä‚¢‚Ü‚·", - est "Kattuv väärtus '%-.64s' võtmele %d" - fre "Duplicata du champ '%-.64s' pour la clef %d" - ger "Doppelter Eintrag '%-.64s' für Schlüssel %d" - greek "ÄéðëÞ åããñáöÞ '%-.64s' ãéá ôï êëåéäß %d" - hun "Duplikalt bejegyzes '%-.64s' a %d kulcs szerint." - ita "Valore duplicato '%-.64s' per la chiave %d" - jpn "'%-.64s' ¤Ï key %d ¤Ë¤ª¤¤¤Æ½ÅÊ£¤·¤Æ¤¤¤Þ¤¹" - kor "Áߺ¹µÈ ÀÔ·Â °ª '%-.64s': key %d" - nor "Like verdier '%-.64s' for nøkkel %d" - norwegian-ny "Like verdiar '%-.64s' for nykkel %d" - pol "Powtórzone wyst?pienie '%-.64s' dla klucza %d" - por "Entrada '%-.64s' duplicada para a chave %d" - rum "Cimpul '%-.64s' e duplicat pentru cheia %d" - rus "äÕÂÌÉÒÕÀÝÁÑÓÑ ÚÁÐÉÓØ '%-.64s' ÐÏ ËÌÀÞÕ %d" - serbian "Dupliran unos '%-.64s' za kljuè '%d'" - slo "Opakovaný kµúè '%-.64s' (èíslo kµúèa %d)" - spa "Entrada duplicada '%-.64s' para la clave %d" - swe "Dubbel nyckel '%-.64s' för nyckel %d" - ukr "äÕÂÌÀÀÞÉÊ ÚÁÐÉÓ '%-.64s' ÄÌÑ ËÌÀÞÁ %d" + cze "Zvojen-Bý klíè '%-.64s' (èíslo klíèe '%-.64s')" + dan "Ens værdier '%-.64s' for indeks '%-.64s'" + nla "Dubbele ingang '%-.64s' voor zoeksleutel '%-.64s'" + eng "Duplicate entry '%-.64s' for key '%-.64s'" + jps "'%-.64s' ‚Í key '%-.64s' ‚É‚¨‚¢‚Äd•¡‚µ‚Ä‚¢‚Ü‚·", + est "Kattuv väärtus '%-.64s' võtmele '%-.64s'" + fre "Duplicata du champ '%-.64s' pour la clef '%-.64s'" + ger "Doppelter Eintrag '%-.64s' für Schlüssel '%-.64s'" + greek "ÄéðëÞ åããñáöÞ '%-.64s' ãéá ôï êëåéäß '%-.64s'" + hun "Duplikalt bejegyzes '%-.64s' a '%-.64s' kulcs szerint." + ita "Valore duplicato '%-.64s' per la chiave '%-.64s'" + jpn "'%-.64s' ¤Ï key '%-.64s' ¤Ë¤ª¤¤¤Æ½ÅÊ£¤·¤Æ¤¤¤Þ¤¹" + kor "Áߺ¹µÈ ÀÔ·Â °ª '%-.64s': key '%-.64s'" + nor "Like verdier '%-.64s' for nøkkel '%-.64s'" + norwegian-ny "Like verdiar '%-.64s' for nykkel '%-.64s'" + pol "Powtórzone wyst?pienie '%-.64s' dla klucza '%-.64s'" + por "Entrada '%-.64s' duplicada para a chave '%-.64s'" + rum "Cimpul '%-.64s' e duplicat pentru cheia '%-.64s'" + rus "äÕÂÌÉÒÕÀÝÁÑÓÑ ÚÁÐÉÓØ '%-.64s' ÐÏ ËÌÀÞÕ '%-.64s'" + serbian "Dupliran unos '%-.64s' za kljuè '%-.64s'" + slo "Opakovaný kµúè '%-.64s' (èíslo kµúèa '%-.64s')" + spa "Entrada duplicada '%-.64s' para la clave '%-.64s'" + swe "Dubbel nyckel '%-.64s' för nyckel '%-.64s'" + ukr "äÕÂÌÀÀÞÉÊ ÚÁÐÉÓ '%-.64s' ÄÌÑ ËÌÀÞÁ '%-.64s'" ER_WRONG_FIELD_SPEC 42000 S1009 cze "Chybn-Bá specifikace sloupce '%-.64s'" dan "Forkert kolonnespecifikaton for felt '%-.64s'" @@ -2981,11 +2981,11 @@ ER_CANT_OPEN_LIBRARY cze "Nemohu otev-Bøít sdílenou knihovnu '%-.64s' (errno: %d %s)" dan "Kan ikke åbne delt bibliotek '%-.64s' (errno: %d %s)" nla "Kan shared library '%-.64s' niet openen (Errcode: %d %s)" - eng "Can't open shared library '%-.64s' (errno: %d %-.64s)" + eng "Can't open shared library '%-.64s' (errno: %d %s)" jps "shared library '%-.64s' ‚ðŠJ‚Ž–‚ª‚Å‚«‚Ü‚¹‚ñ (errno: %d %s)", - est "Ei suuda avada jagatud teeki '%-.64s' (veakood: %d %-.64s)" + est "Ei suuda avada jagatud teeki '%-.64s' (veakood: %d %s)" fre "Impossible d'ouvrir la bibliothèque partagée '%-.64s' (errno: %d %s)" - ger "Kann Shared Library '%-.64s' nicht öffnen (Fehler: %d %-.64s)" + ger "Kann Shared Library '%-.64s' nicht öffnen (Fehler: %d %s)" greek "Äåí åßíáé äõíáôÞ ç áíÜãíùóç ôçò shared library '%-.64s' (êùäéêüò ëÜèïõò: %d %s)" hun "A(z) '%-.64s' megosztott konyvtar nem hasznalhato (hibakod: %d %s)" ita "Impossibile aprire la libreria condivisa '%-.64s' (errno: %d %s)" @@ -2995,18 +2995,18 @@ ER_CANT_OPEN_LIBRARY norwegian-ny "Can't open shared library '%-.64s' (errno: %d %s)" pol "Can't open shared library '%-.64s' (errno: %d %s)" por "Não pode abrir biblioteca compartilhada '%-.64s' (erro no. '%d' - '%-.64s')" - rum "Nu pot deschide libraria shared '%-.64s' (Eroare: %d %-.64s)" - rus "îÅ×ÏÚÍÏÖÎÏ ÏÔËÒÙÔØ ÄÉÎÁÍÉÞÅÓËÕÀ ÂÉÂÌÉÏÔÅËÕ '%-.64s' (ÏÛÉÂËÁ: %d %-.64s)" - serbian "Ne mogu da otvorim share-ovanu biblioteku '%-.64s' (errno: %d %-.64s)" + rum "Nu pot deschide libraria shared '%-.64s' (Eroare: %d %s)" + rus "îÅ×ÏÚÍÏÖÎÏ ÏÔËÒÙÔØ ÄÉÎÁÍÉÞÅÓËÕÀ ÂÉÂÌÉÏÔÅËÕ '%-.64s' (ÏÛÉÂËÁ: %d %s)" + serbian "Ne mogu da otvorim share-ovanu biblioteku '%-.64s' (errno: %d %s)" slo "Nemô¾em otvori» zdieµanú kni¾nicu '%-.64s' (chybový kód: %d %s)" spa "No puedo abrir libraria conjugada '%-.64s' (errno: %d %s)" swe "Kan inte öppna det dynamiska biblioteket '%-.64s' (Felkod: %d %s)" - ukr "îÅ ÍÏÖÕ ×¦ÄËÒÉÔÉ ÒÏÚĦÌÀ×ÁÎÕ Â¦Â̦ÏÔÅËÕ '%-.64s' (ÐÏÍÉÌËÁ: %d %-.64s)" + ukr "îÅ ÍÏÖÕ ×¦ÄËÒÉÔÉ ÒÏÚĦÌÀ×ÁÎÕ Â¦Â̦ÏÔÅËÕ '%-.64s' (ÐÏÍÉÌËÁ: %d %s)" ER_CANT_FIND_DL_ENTRY cze "Nemohu naj-Bít funkci '%-.64s' v knihovnì" dan "Kan ikke finde funktionen '%-.64s' i bibliotek" nla "Kan functie '%-.64s' niet in library vinden" - eng "Can't find function '%-.64s' in library" + eng "Can't find symbol '%-.64s' in library" jps "function '%-.64s' ‚ðƒ‰ƒCƒuƒ‰ƒŠ[’†‚ÉŒ©•t‚¯‚鎖‚ª‚Å‚«‚Ü‚¹‚ñ", est "Ei leia funktsiooni '%-.64s' antud teegis" fre "Impossible de trouver la fonction '%-.64s' dans la bibliothèque" @@ -3018,7 +3018,7 @@ ER_CANT_FIND_DL_ENTRY kor "¶óÀ̹ö·¯¸®¿¡¼ '%-.64s' ÇÔ¼ö¸¦ ãÀ» ¼ö ¾ø½À´Ï´Ù." por "Não pode encontrar a função '%-.64s' na biblioteca" rum "Nu pot gasi functia '%-.64s' in libraria" - rus "îÅ×ÏÚÍÏÖÎÏ ÏÔÙÓËÁÔØ ÆÕÎËÃÉÀ '%-.64s' × ÂÉÂÌÉÏÔÅËÅ" + rus "îÅ×ÏÚÍÏÖÎÏ ÏÔÙÓËÁÔØ ÓÉÍ×ÏÌ '%-.64s' × ÂÉÂÌÉÏÔÅËÅ" serbian "Ne mogu da pronadjem funkciju '%-.64s' u biblioteci" slo "Nemô¾em nájs» funkciu '%-.64s' v kni¾nici" spa "No puedo encontrar función '%-.64s' en libraria" @@ -4898,10 +4898,7 @@ ER_WARN_NULL_TO_NOTNULL 22004 por "Dado truncado, NULL fornecido para NOT NULL coluna '%s' na linha %ld" spa "Datos truncado, NULL suministrado para NOT NULL columna '%s' en la línea %ld" ER_WARN_DATA_OUT_OF_RANGE 22003 - eng "Out of range value adjusted for column '%s' at row %ld" - ger "Daten abgeschnitten, außerhalb des Wertebereichs für Feld '%s' in Zeile %ld" - por "Dado truncado, fora de alcance para coluna '%s' na linha %ld" - spa "Datos truncados, fuera de gama para columna '%s' en la línea %ld" + eng "Out of range value for column '%s' at row %ld" WARN_DATA_TRUNCATED 01000 eng "Data truncated for column '%s' at row %ld" ger "Daten abgeschnitten für Feld '%s' in Zeile %ld" @@ -5021,7 +5018,7 @@ ER_UNKNOWN_STORAGE_ENGINE 42000 ger "Unbekannte Speicher-Engine '%s'" por "Motor de tabela desconhecido '%s'" spa "Desconocido motor de tabla '%s'" -ER_WARN_DEPRECATED_SYNTAX +ER_UNUSED_1 eng "'%s' is deprecated; use '%s' instead" ger "'%s' ist veraltet. Bitte benutzen Sie '%s'" por "'%s' é desatualizado. Use '%s' em seu lugar" @@ -5193,8 +5190,8 @@ ER_SP_CANT_ALTER eng "Failed to ALTER %s %s" ger "ALTER %s %s fehlgeschlagen" ER_SP_SUBSELECT_NYI 0A000 - eng "Subselect value not supported" - ger "Subselect-Wert wird nicht unterstützt" + eng "Subquery value not supported" + ger "Subquery-Wert wird nicht unterstützt" ER_STMT_NOT_ALLOWED_IN_SF_OR_TRG 0A000 eng "%s is not allowed in stored function or trigger" ger "%s ist in gespeicherten Funktionen und in Triggern nicht erlaubt" @@ -5592,7 +5589,7 @@ ER_NO_REFERENCED_ROW_2 23000 eng "Cannot add or update a child row: a foreign key constraint fails (%.192s)" ger "Kann Kind-Zeile nicht hinzufügen oder aktualisieren: eine Fremdschlüsselbedingung schlägt fehl (%.192s)" ER_SP_BAD_VAR_SHADOW 42000 - eng "Variable '%-.64s' must be quoted with `...`, or renamed" + eng "Variable '%-.64s' must be quoted with `...`, or renamed" ger "Variable '%-.64s' muss mit `...` geschützt oder aber umbenannt werden" ER_TRG_NO_DEFINER eng "No definer attribute for trigger '%-.64s'.'%-.64s'. The trigger will be activated under the authorization of the invoker, which may have insufficient privileges. Please recreate the trigger." @@ -5604,10 +5601,223 @@ ER_SP_RECURSION_LIMIT eng "Recursive limit %d (as set by the max_sp_recursion_depth variable) was exceeded for routine %.64s" ger "Rekursionsgrenze %d (durch Variable max_sp_recursion_depth gegeben) wurde für Routine %.64s überschritten" ER_SP_PROC_TABLE_CORRUPT - eng "Failed to load routine %s. The table mysql.proc is missing, corrupt, or contains bad data (internal code %d)" + eng "Failed to load routine %-.64s. The table mysql.proc is missing, corrupt, or contains bad data (internal code %d)" +ER_PARTITION_REQUIRES_VALUES_ERROR + eng "%-.64s PARTITIONING requires definition of VALUES %-.64s for each partition" + swe "%-.64s PARTITIONering kräver definition av VALUES %-.64s för varje partition" +ER_PARTITION_WRONG_VALUES_ERROR + eng "Only %-.64s PARTITIONING can use VALUES %-.64s in partition definition" + swe "Endast %-.64s partitionering kan använda VALUES %-.64s i definition av partitionen" +ER_PARTITION_MAXVALUE_ERROR + eng "MAXVALUE can only be used in last partition definition" + swe "MAXVALUE kan bara användas i definitionen av den sista partitionen" +ER_PARTITION_SUBPARTITION_ERROR + eng "Subpartitions can only be hash partitions and by key" + swe "Subpartitioner kan bara vara hash och key partitioner" +ER_PARTITION_WRONG_NO_PART_ERROR + eng "Wrong number of partitions defined, mismatch with previous setting" + swe "Antal partitioner definierade och antal partitioner är inte lika" +ER_PARTITION_WRONG_NO_SUBPART_ERROR + eng "Wrong number of subpartitions defined, mismatch with previous setting" + swe "Antal subpartitioner definierade och antal subpartitioner är inte lika" +ER_CONST_EXPR_IN_PARTITION_FUNC_ERROR + eng "Constant/Random expression in (sub)partitioning function is not allowed" + swe "Konstanta uttryck eller slumpmässiga uttryck är inte tillåtna (sub)partitioneringsfunktioner" +ER_NO_CONST_EXPR_IN_RANGE_OR_LIST_ERROR + eng "Expression in RANGE/LIST VALUES must be constant" + swe "Uttryck i RANGE/LIST VALUES måste vara ett konstant uttryck" +ER_FIELD_NOT_FOUND_PART_ERROR + eng "Field in list of fields for partition function not found in table" + swe "Fält i listan av fält för partitionering med key inte funnen i tabellen" +ER_LIST_OF_FIELDS_ONLY_IN_HASH_ERROR + eng "List of fields is only allowed in KEY partitions" + swe "En lista av fält är endast tillåtet för KEY partitioner" +ER_INCONSISTENT_PARTITION_INFO_ERROR + eng "The partition info in the frm file is not consistent with what can be written into the frm file" + swe "Partitioneringsinformationen i frm-filen är inte konsistent med vad som kan skrivas i frm-filen" +ER_PARTITION_FUNC_NOT_ALLOWED_ERROR + eng "The %-.64s function returns the wrong type" + swe "%-.64s-funktionen returnerar felaktig typ" +ER_PARTITIONS_MUST_BE_DEFINED_ERROR + eng "For %-.64s partitions each partition must be defined" + swe "För %-.64s partitionering så måste varje partition definieras" +ER_RANGE_NOT_INCREASING_ERROR + eng "VALUES LESS THAN value must be strictly increasing for each partition" + swe "Värden i VALUES LESS THAN måste vara strikt växande för varje partition" +ER_INCONSISTENT_TYPE_OF_FUNCTIONS_ERROR + eng "VALUES value must be of same type as partition function" + swe "Värden i VALUES måste vara av samma typ som partitioneringsfunktionen" +ER_MULTIPLE_DEF_CONST_IN_LIST_PART_ERROR + eng "Multiple definition of same constant in list partitioning" + swe "Multipel definition av samma konstant i list partitionering" +ER_PARTITION_ENTRY_ERROR + eng "Partitioning can not be used stand-alone in query" + swe "Partitioneringssyntax kan inte användas på egen hand i en SQL-fråga" +ER_MIX_HANDLER_ERROR + eng "The mix of handlers in the partitions is not allowed in this version of MySQL" + swe "Denna mix av lagringsmotorer är inte tillåten i denna version av MySQL" +ER_PARTITION_NOT_DEFINED_ERROR + eng "For the partitioned engine it is necessary to define all %-.64s" + swe "För partitioneringsmotorn så är det nödvändigt att definiera alla %-.64s" +ER_TOO_MANY_PARTITIONS_ERROR + eng "Too many partitions (including subpartitions) were defined" + swe "För många partitioner (inkluderande subpartitioner) definierades" +ER_SUBPARTITION_ERROR + eng "It is only possible to mix RANGE/LIST partitioning with HASH/KEY partitioning for subpartitioning" + swe "Det är endast möjligt att blanda RANGE/LIST partitionering med HASH/KEY partitionering för subpartitionering" +ER_CANT_CREATE_HANDLER_FILE + eng "Failed to create specific handler file" + swe "Misslyckades med att skapa specifik fil i lagringsmotor" +ER_BLOB_FIELD_IN_PART_FUNC_ERROR + eng "A BLOB field is not allowed in partition function" + swe "Ett BLOB-fält är inte tillåtet i partitioneringsfunktioner" +ER_UNIQUE_KEY_NEED_ALL_FIELDS_IN_PF + eng "A %-.64s need to include all fields in the partition function" + swe "En %-.64s behöver inkludera alla fält i partitioneringsfunktionen för denna lagringsmotor" +ER_NO_PARTS_ERROR + eng "Number of %-.64s = 0 is not an allowed value" + swe "Antal %-.64s = 0 är inte ett tillåten värde" +ER_PARTITION_MGMT_ON_NONPARTITIONED + eng "Partition management on a not partitioned table is not possible" + swe "Partitioneringskommando på en opartitionerad tabell är inte möjligt" +ER_FOREIGN_KEY_ON_PARTITIONED + eng "Foreign key condition is not yet supported in conjunction with partitioning" + swe "Foreign key villkor är inte ännu implementerad i kombination med partitionering" +ER_DROP_PARTITION_NON_EXISTENT + eng "Error in list of partitions to %-.64s" + swe "Fel i listan av partitioner att %-.64s" +ER_DROP_LAST_PARTITION + eng "Cannot remove all partitions, use DROP TABLE instead" + swe "Det är inte tillåtet att ta bort alla partitioner, använd DROP TABLE istället" +ER_COALESCE_ONLY_ON_HASH_PARTITION + eng "COALESCE PARTITION can only be used on HASH/KEY partitions" + swe "COALESCE PARTITION kan bara användas på HASH/KEY partitioner" +ER_REORG_HASH_ONLY_ON_SAME_NO + eng "REORGANISE PARTITION can only be used to reorganise partitions not to change their numbers" + swe "REORGANISE PARTITION kan bara användas för att omorganisera partitioner, inte för att ändra deras antal" +ER_REORG_NO_PARAM_ERROR + eng "REORGANISE PARTITION without parameters can only be used on auto-partitioned tables using HASH PARTITIONs" + swe "REORGANISE PARTITION utan parametrar kan bara användas på auto-partitionerade tabeller som använder HASH partitionering" +ER_ONLY_ON_RANGE_LIST_PARTITION + eng "%-.64s PARTITION can only be used on RANGE/LIST partitions" + swe "%-.64s PARTITION kan bara användas på RANGE/LIST-partitioner" +ER_ADD_PARTITION_SUBPART_ERROR + eng "Trying to Add partition(s) with wrong number of subpartitions" + swe "ADD PARTITION med fel antal subpartitioner" +ER_ADD_PARTITION_NO_NEW_PARTITION + eng "At least one partition must be added" + swe "Åtminstone en partition måste läggas till vid ADD PARTITION" +ER_COALESCE_PARTITION_NO_PARTITION + eng "At least one partition must be coalesced" + swe "Åtminstone en partition måste slås ihop vid COALESCE PARTITION" +ER_REORG_PARTITION_NOT_EXIST + eng "More partitions to reorganise than there are partitions" + swe "Fler partitioner att reorganisera än det finns partitioner" +ER_SAME_NAME_PARTITION + eng "Duplicate partition name %-.64s" + swe "Duplicerat partitionsnamn %-.64s" +ER_NO_BINLOG_ERROR + eng "It is not allowed to shut off binlog on this command" + swe "Det är inte tillåtet att stänga av binlog på detta kommando" +ER_CONSECUTIVE_REORG_PARTITIONS + eng "When reorganising a set of partitions they must be in consecutive order" + swe "När ett antal partitioner omorganiseras måste de vara i konsekutiv ordning" +ER_REORG_OUTSIDE_RANGE + eng "Reorganize of range partitions cannot change total ranges except for last partition where it can extend the range" + swe "Reorganisering av rangepartitioner kan inte ändra den totala intervallet utom för den sista partitionen där intervallet kan utökas" +ER_PARTITION_FUNCTION_FAILURE + eng "Partition function not supported in this version for this handler" +ER_PART_STATE_ERROR + eng "Partition state cannot be defined from CREATE/ALTER TABLE" + swe "Partition state kan inte definieras från CREATE/ALTER TABLE" +ER_LIMITED_PART_RANGE + eng "The %-.64s handler only supports 32 bit integers in VALUES" + swe "%-.64s stödjer endast 32 bitar i integers i VALUES" +ER_PLUGIN_IS_NOT_LOADED + eng "Plugin '%-.64s' is not loaded" +ER_WRONG_VALUE + eng "Incorrect %-.32s value: '%-.128s'" +ER_NO_PARTITION_FOR_GIVEN_VALUE + eng "Table has no partition for value %-.64s" +ER_FILEGROUP_OPTION_ONLY_ONCE + eng "It is not allowed to specify %s more than once" +ER_CREATE_FILEGROUP_FAILED + eng "Failed to create %s" +ER_DROP_FILEGROUP_FAILED + eng "Failed to drop %s" +ER_TABLESPACE_AUTO_EXTEND_ERROR + eng "The handler doesn't support autoextend of tablespaces" +ER_WRONG_SIZE_NUMBER + eng "A size parameter was incorrectly specified, either number or on the form 10M" +ER_SIZE_OVERFLOW_ERROR + eng "The size number was correct but we don't allow the digit part to be more than 2 billion" +ER_ALTER_FILEGROUP_FAILED + eng "Failed to alter: %s" +ER_BINLOG_ROW_LOGGING_FAILED + eng "Writing one row to the row-based binary log failed" +ER_BINLOG_ROW_WRONG_TABLE_DEF + eng "Table definition on master and slave does not match" +ER_BINLOG_ROW_RBR_TO_SBR + eng "Slave running with --log-slave-updates must use row-based binary logging to be able to replicate row-based binary log events" +ER_EVENT_ALREADY_EXISTS + eng "Event '%-.64s' already exists" +ER_EVENT_STORE_FAILED + eng "Failed to store event %s. Error code %d from storage engine." +ER_EVENT_DOES_NOT_EXIST + eng "Unknown event '%-.64s'" +ER_EVENT_CANT_ALTER + eng "Failed to alter event '%-.64s'" +ER_EVENT_DROP_FAILED + eng "Failed to drop %s" +ER_EVENT_INTERVAL_NOT_POSITIVE_OR_TOO_BIG + eng "INTERVAL is either not positive or too big" +ER_EVENT_ENDS_BEFORE_STARTS + eng "ENDS is either invalid or before STARTS" +ER_EVENT_EXEC_TIME_IN_THE_PAST + eng "Activation (AT) time is in the past" +ER_EVENT_OPEN_TABLE_FAILED + eng "Failed to open mysql.event" +ER_EVENT_NEITHER_M_EXPR_NOR_M_AT + eng "No datetime expression provided" +ER_COL_COUNT_DOESNT_MATCH_CORRUPTED + eng "Column count of mysql.%s is wrong. Expected %d, found %d. Table probably corrupted" +ER_CANNOT_LOAD_FROM_TABLE + eng "Cannot load from mysql.%s. Table probably corrupted. See error log." +ER_EVENT_CANNOT_DELETE + eng "Failed to delete the event from mysql.event" +ER_EVENT_COMPILE_ERROR + eng "Error during compilation of event's body" +ER_EVENT_SAME_NAME + eng "Same old and new event name" +ER_EVENT_DATA_TOO_LONG + eng "Data for column '%s' too long" +ER_DROP_INDEX_FK + eng "Cannot drop index '%-.64s': needed in a foreign key constraint" + ger "Kann Index '%-.64s' nicht löschen: wird für einen Fremdschlüssel benötigt" +ER_CANT_WRITE_LOCK_LOG_TABLE + eng "You can't write-lock a log table. Only read access is possible." +ER_CANT_READ_LOCK_LOG_TABLE + eng "You can't use usual read lock with log tables. Try READ LOCAL instead." ER_SP_WRONG_NAME 42000 eng "Incorrect routine name '%-.64s'" +ER_FOREIGN_DUPLICATE_KEY 23000 S1009 + eng "Upholding foreign key constraints for table '%.64s', entry '%-.64s', key %d would lead to a duplicate entry" +ER_COL_COUNT_DOESNT_MATCH_PLEASE_UPDATE + eng "Column count of mysql.%s is wrong. Expected %d, found %d. Created with MySQL %d, now running %d. Please use scripts/mysql_fix_privilege_tables" ER_TABLE_NEEDS_UPGRADE eng "Table upgrade required. Please do \"REPAIR TABLE `%-.32s`\" to fix it!" +ER_ILLEGAL_HA_CREATE_OPTION + eng "Table storage engine '%-.64s' does not support the create option '%.64s'" +ER_CANT_CHANGE_TX_ISOLATION 25001 + eng "Transaction isolation level can't be changed while a transaction is in progress" +ER_WARN_DEPRECATED + eng "The syntax '%s' is deprecated and will be removed in MySQL %s. Please use %s instead." ER_SP_NO_AGGREGATE 42000 eng "AGGREGATE is not supported for stored functions" +ER_TEMP_TABLE_PREVENTS_SWITCH_OUT_OF_RBR + eng "Cannot switch out of the row-based binary log format when the session has open temporary tables" +ER_STORED_FUNCTION_PREVENTS_SWITCH_BINLOG_FORMAT + eng "Cannot change the binary logging format inside a stored function or trigger" +ER_NDB_CANT_SWITCH_BINLOG_FORMAT + eng "The NDB cluster engine does not support changing the binlog format on the fly yet" + diff --git a/sql/slave.cc b/sql/slave.cc index fa7ccc4427d..39656700e1c 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -16,17 +16,21 @@ #include "mysql_priv.h" -#ifdef HAVE_REPLICATION - #include <mysql.h> #include <myisam.h> +#include "rpl_rli.h" #include "slave.h" #include "sql_repl.h" +#include "rpl_filter.h" #include "repl_failsafe.h" #include <thr_alarm.h> #include <my_dir.h> #include <sql_common.h> +#ifdef HAVE_REPLICATION + +#include "rpl_tblmap.h" + #define MAX_SLAVE_RETRY_PAUSE 5 bool use_slave_mask = 0; MY_BITMAP slave_error_mask; @@ -35,12 +39,7 @@ typedef bool (*CHECK_KILLED_FUNC)(THD*,void*); volatile bool slave_sql_running = 0, slave_io_running = 0; char* slave_load_tmpdir = 0; -MASTER_INFO *active_mi; -HASH replicate_do_table, replicate_ignore_table; -DYNAMIC_ARRAY replicate_wild_do_table, replicate_wild_ignore_table; -bool do_table_inited = 0, ignore_table_inited = 0; -bool wild_do_table_inited = 0, wild_ignore_table_inited = 0; -bool table_rules_on= 0; +MASTER_INFO *active_mi= 0; my_bool replicate_same_server_id; ulonglong relay_log_space_limit = 0; @@ -52,8 +51,6 @@ ulonglong relay_log_space_limit = 0; */ int disconnect_slave_event_count = 0, abort_slave_event_count = 0; -int events_till_abort = -1; -static int events_till_disconnect = -1; typedef enum { SLAVE_THD_IO, SLAVE_THD_SQL} SLAVE_THD_TYPE; @@ -195,20 +192,6 @@ err: } -static void free_table_ent(TABLE_RULE_ENT* e) -{ - my_free((gptr) e, MYF(0)); -} - - -static byte* get_table_key(TABLE_RULE_ENT* e, uint* len, - my_bool not_used __attribute__((unused))) -{ - *len = e->key_len; - return (byte*)e->db; -} - - /* Open the given relay log @@ -531,7 +514,7 @@ void st_relay_log_info::close_temporary_tables() Don't ask for disk deletion. For now, anyway they will be deleted when slave restarts, but it is a better intention to not delete them. */ - close_temporary(table, 0); + close_temporary(table, 1, 0); } save_temporary_tables= 0; slave_open_temp_tables= 0; @@ -824,237 +807,6 @@ int start_slave_threads(bool need_slave_mutex, bool wait_for_start, } -void init_table_rule_hash(HASH* h, bool* h_inited) -{ - hash_init(h, system_charset_info,TABLE_RULE_HASH_SIZE,0,0, - (hash_get_key) get_table_key, - (hash_free_key) free_table_ent, 0); - *h_inited = 1; -} - - -void init_table_rule_array(DYNAMIC_ARRAY* a, bool* a_inited) -{ - my_init_dynamic_array(a, sizeof(TABLE_RULE_ENT*), TABLE_RULE_ARR_SIZE, - TABLE_RULE_ARR_SIZE); - *a_inited = 1; -} - - -static TABLE_RULE_ENT* find_wild(DYNAMIC_ARRAY *a, const char* key, int len) -{ - uint i; - const char* key_end = key + len; - - for (i = 0; i < a->elements; i++) - { - TABLE_RULE_ENT* e ; - get_dynamic(a, (gptr)&e, i); - if (!my_wildcmp(system_charset_info, key, key_end, - (const char*)e->db, - (const char*)(e->db + e->key_len), - '\\',wild_one,wild_many)) - return e; - } - - return 0; -} - - -/* - Checks whether tables match some (wild_)do_table and (wild_)ignore_table - rules (for replication) - - SYNOPSIS - tables_ok() - thd thread (SQL slave thread normally). Mustn't be null. - tables list of tables to check - - NOTES - Note that changing the order of the tables in the list can lead to - different results. Note also the order of precedence of the do/ignore - rules (see code below). For that reason, users should not set conflicting - rules because they may get unpredicted results (precedence order is - explained in the manual). - - Thought which arose from a question of a big customer "I want to include - all tables like "abc.%" except the "%.EFG"". This can't be done now. If we - supported Perl regexps we could do it with this pattern: /^abc\.(?!EFG)/ - (I could not find an equivalent in the regex library MySQL uses). - - RETURN VALUES - 0 should not be logged/replicated - 1 should be logged/replicated -*/ - -bool tables_ok(THD* thd, TABLE_LIST* tables) -{ - bool some_tables_updating= 0; - DBUG_ENTER("tables_ok"); - - /* - In routine, can't reliably pick and choose substatements, so always - replicate. - We can't reliably know if one substatement should be executed or not: - consider the case of this substatement: a SELECT on a non-replicated - constant table; if we don't execute it maybe it was going to fill a - variable which was going to be used by the next substatement to update - a replicated table? If we execute it maybe the constant non-replicated - table does not exist (and so we'll fail) while there was no need to - execute this as this SELECT does not influence replicated tables in the - rest of the routine? In other words: users are used to replicate-*-table - specifying how to handle updates to tables, these options don't say - anything about reads to tables; we can't guess. - */ - if (thd->spcont) - DBUG_RETURN(1); - - for (; tables; tables= tables->next_global) - { - char hash_key[2*NAME_LEN+2]; - char *end; - uint len; - - if (!tables->updating) - continue; - some_tables_updating= 1; - end= strmov(hash_key, tables->db ? tables->db : thd->db); - *end++= '.'; - len= (uint) (strmov(end, tables->table_name) - hash_key); - if (do_table_inited) // if there are any do's - { - if (hash_search(&replicate_do_table, (byte*) hash_key, len)) - DBUG_RETURN(1); - } - if (ignore_table_inited) // if there are any ignores - { - if (hash_search(&replicate_ignore_table, (byte*) hash_key, len)) - DBUG_RETURN(0); - } - if (wild_do_table_inited && find_wild(&replicate_wild_do_table, - hash_key, len)) - DBUG_RETURN(1); - if (wild_ignore_table_inited && find_wild(&replicate_wild_ignore_table, - hash_key, len)) - DBUG_RETURN(0); - } - - /* - If no table was to be updated, ignore statement (no reason we play it on - slave, slave is supposed to replicate _changes_ only). - If no explicit rule found and there was a do list, do not replicate. - If there was no do list, go ahead - */ - DBUG_RETURN(some_tables_updating && - !do_table_inited && !wild_do_table_inited); -} - - -/* - Checks whether a db matches wild_do_table and wild_ignore_table - rules (for replication) - - SYNOPSIS - db_ok_with_wild_table() - db name of the db to check. - Is tested with check_db_name() before calling this function. - - NOTES - Here is the reason for this function. - We advise users who want to exclude a database 'db1' safely to do it - with replicate_wild_ignore_table='db1.%' instead of binlog_ignore_db or - replicate_ignore_db because the two lasts only check for the selected db, - which won't work in that case: - USE db2; - UPDATE db1.t SET ... #this will be replicated and should not - whereas replicate_wild_ignore_table will work in all cases. - With replicate_wild_ignore_table, we only check tables. When - one does 'DROP DATABASE db1', tables are not involved and the - statement will be replicated, while users could expect it would not (as it - rougly means 'DROP db1.first_table, DROP db1.second_table...'). - In other words, we want to interpret 'db1.%' as "everything touching db1". - That is why we want to match 'db1' against 'db1.%' wild table rules. - - RETURN VALUES - 0 should not be logged/replicated - 1 should be logged/replicated - */ - -int db_ok_with_wild_table(const char *db) -{ - char hash_key[NAME_LEN+2]; - char *end; - int len; - end= strmov(hash_key, db); - *end++= '.'; - len= end - hash_key ; - if (wild_do_table_inited && find_wild(&replicate_wild_do_table, - hash_key, len)) - return 1; - if (wild_ignore_table_inited && find_wild(&replicate_wild_ignore_table, - hash_key, len)) - return 0; - - /* - If no explicit rule found and there was a do list, do not replicate. - If there was no do list, go ahead - */ - return !wild_do_table_inited; -} - - -int add_table_rule(HASH* h, const char* table_spec) -{ - const char* dot = strchr(table_spec, '.'); - if (!dot) return 1; - // len is always > 0 because we know the there exists a '.' - uint len = (uint)strlen(table_spec); - TABLE_RULE_ENT* e = (TABLE_RULE_ENT*)my_malloc(sizeof(TABLE_RULE_ENT) - + len, MYF(MY_WME)); - if (!e) return 1; - e->db = (char*)e + sizeof(TABLE_RULE_ENT); - e->tbl_name = e->db + (dot - table_spec) + 1; - e->key_len = len; - memcpy(e->db, table_spec, len); - (void)my_hash_insert(h, (byte*)e); - return 0; -} - - -/* - Add table expression with wildcards to dynamic array -*/ - -int add_wild_table_rule(DYNAMIC_ARRAY* a, const char* table_spec) -{ - const char* dot = strchr(table_spec, '.'); - if (!dot) return 1; - uint len = (uint)strlen(table_spec); - TABLE_RULE_ENT* e = (TABLE_RULE_ENT*)my_malloc(sizeof(TABLE_RULE_ENT) - + len, MYF(MY_WME)); - if (!e) return 1; - e->db = (char*)e + sizeof(TABLE_RULE_ENT); - e->tbl_name = e->db + (dot - table_spec) + 1; - e->key_len = len; - memcpy(e->db, table_spec, len); - insert_dynamic(a, (gptr)&e); - return 0; -} - - -static void free_string_array(DYNAMIC_ARRAY *a) -{ - uint i; - for (i = 0; i < a->elements; i++) - { - char* p; - get_dynamic(a, (gptr) &p, i); - my_free(p, MYF(MY_WME)); - } - delete_dynamic(a); -} - - #ifdef NOT_USED_YET static int end_slave_on_walk(MASTER_INFO* mi, gptr /*unused*/) { @@ -1090,14 +842,6 @@ void end_slave() */ terminate_slave_threads(active_mi,SLAVE_FORCE_ALL); end_master_info(active_mi); - if (do_table_inited) - hash_free(&replicate_do_table); - if (ignore_table_inited) - hash_free(&replicate_ignore_table); - if (wild_do_table_inited) - free_string_array(&replicate_wild_do_table); - if (wild_ignore_table_inited) - free_string_array(&replicate_wild_ignore_table); delete active_mi; active_mi= 0; } @@ -1117,19 +861,48 @@ static bool sql_slave_killed(THD* thd, RELAY_LOG_INFO* rli) { DBUG_ASSERT(rli->sql_thd == thd); DBUG_ASSERT(rli->slave_running == 1);// tracking buffer overrun - return rli->abort_slave || abort_loop || thd->killed; + if (abort_loop || thd->killed || rli->abort_slave) + { + /* + If we are in an unsafe situation (stopping could corrupt replication), + we give one minute to the slave SQL thread of grace before really + terminating, in the hope that it will be able to read more events and + the unsafe situation will soon be left. Note that this one minute starts + from the last time anything happened in the slave SQL thread. So it's + really one minute of idleness, we don't timeout if the slave SQL thread + is actively working. + */ + if (!rli->unsafe_to_stop_at) + return 1; + DBUG_PRINT("info", ("Slave SQL thread is in an unsafe situation, giving " + "it some grace period")); + if (difftime(time(0), rli->unsafe_to_stop_at) > 60) + { + slave_print_msg(ERROR_LEVEL, rli, 0, + "SQL thread had to stop in an unsafe situation, in " + "the middle of applying updates to a " + "non-transactional table without any primary key. " + "There is a risk of duplicate updates when the slave " + "SQL thread is restarted. Please check your tables' " + "contents after restart."); + return 1; + } + } + return 0; } /* - Writes an error message to rli->last_slave_error and rli->last_slave_errno - (which will be displayed by SHOW SLAVE STATUS), and prints it to stderr. + Writes a message to stderr, and if it's an error message, to + rli->last_slave_error and rli->last_slave_errno (which will be displayed by + SHOW SLAVE STATUS). SYNOPSIS - slave_print_error() - rli + slave_print_msg() + level The severity level + rli err_code The error code - msg The error message (usually related to the error code, but can + msg The message (usually related to the error code, but can contain more information). ... (this is printf-like format, with % symbols in msg) @@ -1137,22 +910,47 @@ static bool sql_slave_killed(THD* thd, RELAY_LOG_INFO* rli) void */ -void slave_print_error(RELAY_LOG_INFO* rli, int err_code, const char* msg, ...) +void slave_print_msg(enum loglevel level, RELAY_LOG_INFO* rli, + int err_code, const char* msg, ...) { + void (*report_function)(const char *, ...); + char buff[MAX_SLAVE_ERRMSG], *pbuff= buff; + uint pbuffsize= sizeof(buff); va_list args; va_start(args,msg); - my_vsnprintf(rli->last_slave_error, - sizeof(rli->last_slave_error), msg, args); - rli->last_slave_errno = err_code; - /* If the error string ends with '.', do not add a ',' it would be ugly */ - if (rli->last_slave_error[0] && - (*(strend(rli->last_slave_error)-1) == '.')) - sql_print_error("Slave: %s Error_code: %d", rli->last_slave_error, - err_code); + switch (level) + { + case ERROR_LEVEL: + /* + This my_error call only has effect in client threads. + Slave threads do nothing in my_error(). + */ + my_error(ER_UNKNOWN_ERROR, MYF(0), msg); + /* + It's an error, it must be reported in Last_error and Last_errno in SHOW + SLAVE STATUS. + */ + pbuff= rli->last_slave_error; + pbuffsize= sizeof(rli->last_slave_error); + rli->last_slave_errno = err_code; + report_function= sql_print_error; + break; + case WARNING_LEVEL: + report_function= sql_print_warning; + break; + case INFORMATION_LEVEL: + report_function= sql_print_information; + break; + default: + DBUG_ASSERT(0); // should not come here + return; // don't crash production builds, just do nothing + } + my_vsnprintf(pbuff, pbuffsize, msg, args); + /* If the msg string ends with '.', do not add a ',' it would be ugly */ + if (pbuff[0] && (*(strend(pbuff)-1) == '.')) + (*report_function)("Slave: %s Error_code: %d", pbuff, err_code); else - sql_print_error("Slave: %s, Error_code: %d", rli->last_slave_error, - err_code); - + (*report_function)("Slave: %s, Error_code: %d", pbuff, err_code); } /* @@ -1176,25 +974,6 @@ bool net_request_file(NET* net, const char* fname) DBUG_RETURN(net_write_command(net, 251, fname, strlen(fname), "", 0)); } - -const char *rewrite_db(const char* db, uint32 *new_len) -{ - if (replicate_rewrite_db.is_empty() || !db) - return db; - I_List_iterator<i_string_pair> it(replicate_rewrite_db); - i_string_pair* tmp; - - while ((tmp=it++)) - { - if (!strcmp(tmp->key, db)) - { - *new_len= (uint32)strlen(tmp->val); - return tmp->val; - } - } - return db; -} - /* From other comments and tests in code, it looks like sometimes Query_log_event and Load_log_event can have db == 0 @@ -1207,61 +986,6 @@ const char *print_slave_db_safe(const char* db) return (db ? db : ""); } -/* - Checks whether a db matches some do_db and ignore_db rules - (for logging or replication) - - SYNOPSIS - db_ok() - db name of the db to check - do_list either binlog_do_db or replicate_do_db - ignore_list either binlog_ignore_db or replicate_ignore_db - - RETURN VALUES - 0 should not be logged/replicated - 1 should be logged/replicated -*/ - -int db_ok(const char* db, I_List<i_string> &do_list, - I_List<i_string> &ignore_list ) -{ - if (do_list.is_empty() && ignore_list.is_empty()) - return 1; // ok to replicate if the user puts no constraints - - /* - If the user has specified restrictions on which databases to replicate - and db was not selected, do not replicate. - */ - if (!db) - return 0; - - if (!do_list.is_empty()) // if the do's are not empty - { - I_List_iterator<i_string> it(do_list); - i_string* tmp; - - while ((tmp=it++)) - { - if (!strcmp(tmp->ptr, db)) - return 1; // match - } - return 0; - } - else // there are some elements in the don't, otherwise we cannot get here - { - I_List_iterator<i_string> it(ignore_list); - i_string* tmp; - - while ((tmp=it++)) - { - if (!strcmp(tmp->ptr, db)) - return 0; // match - } - return 1; - } -} - - static int init_strvar_from_file(char *var, int max_size, IO_CACHE *f, const char *default_val) { @@ -1625,7 +1349,7 @@ static int create_table_from_dump(THD* thd, MYSQL *mysql, const char* db, error=file->ha_repair(thd,&check_opt) != 0; thd->net.vio = save_vio; if (error) - my_error(ER_INDEX_REBUILD, MYF(0), tables.table->s->table_name); + my_error(ER_INDEX_REBUILD, MYF(0), tables.table->s->table_name.str); err: close_thread_tables(thd); @@ -1708,6 +1432,7 @@ static int init_relay_log_info(RELAY_LOG_INFO* rli, const char* msg = 0; int error = 0; DBUG_ENTER("init_relay_log_info"); + DBUG_ASSERT(!rli->no_storage); // Don't init if there is no storage if (rli->inited) // Set if this function called DBUG_RETURN(0); @@ -2003,7 +1728,7 @@ static void write_ignored_events_info_to_relay_log(THD *thd, MASTER_INFO *mi) if (rli->ign_master_log_name_end[0]) { DBUG_PRINT("info",("writing a Rotate event to track down ignored events")); - Rotate_log_event *ev= new Rotate_log_event(thd, rli->ign_master_log_name_end, + Rotate_log_event *ev= new Rotate_log_event(rli->ign_master_log_name_end, 0, rli->ign_master_log_pos_end, Rotate_log_event::DUP_NAME); rli->ign_master_log_name_end[0]= 0; @@ -2338,48 +2063,6 @@ int register_slave_on_master(MYSQL* mysql) } -/* - Builds a String from a HASH of TABLE_RULE_ENT. Cannot be used for any other - hash, as it assumes that the hash entries are TABLE_RULE_ENT. - - SYNOPSIS - table_rule_ent_hash_to_str() - s pointer to the String to fill - h pointer to the HASH to read - - RETURN VALUES - none -*/ - -void table_rule_ent_hash_to_str(String* s, HASH* h) -{ - s->length(0); - for (uint i=0 ; i < h->records ; i++) - { - TABLE_RULE_ENT* e= (TABLE_RULE_ENT*) hash_element(h, i); - if (s->length()) - s->append(','); - s->append(e->db,e->key_len); - } -} - -/* - Mostly the same thing as above -*/ - -void table_rule_ent_dynamic_array_to_str(String* s, DYNAMIC_ARRAY* a) -{ - s->length(0); - for (uint i=0 ; i < a->elements ; i++) - { - TABLE_RULE_ENT* e; - get_dynamic(a, (gptr)&e, i); - if (s->length()) - s->append(','); - s->append(e->db,e->key_len); - } -} - bool show_master_info(THD* thd, MASTER_INFO* mi) { // TODO: fix this for multi-master @@ -2475,23 +2158,18 @@ bool show_master_info(THD* thd, MASTER_INFO* mi) protocol->store(mi->slave_running == MYSQL_SLAVE_RUN_CONNECT ? "Yes" : "No", &my_charset_bin); protocol->store(mi->rli.slave_running ? "Yes":"No", &my_charset_bin); - protocol->store(&replicate_do_db); - protocol->store(&replicate_ignore_db); - /* - We can't directly use some protocol->store for - replicate_*_table, - as Protocol doesn't know the TABLE_RULE_ENT struct. - We first build Strings and then pass them to protocol->store. - */ + protocol->store(rpl_filter->get_do_db()); + protocol->store(rpl_filter->get_ignore_db()); + char buf[256]; String tmp(buf, sizeof(buf), &my_charset_bin); - table_rule_ent_hash_to_str(&tmp, &replicate_do_table); + rpl_filter->get_do_table(&tmp); protocol->store(&tmp); - table_rule_ent_hash_to_str(&tmp, &replicate_ignore_table); + rpl_filter->get_ignore_table(&tmp); protocol->store(&tmp); - table_rule_ent_dynamic_array_to_str(&tmp, &replicate_wild_do_table); + rpl_filter->get_wild_do_table(&tmp); protocol->store(&tmp); - table_rule_ent_dynamic_array_to_str(&tmp, &replicate_wild_ignore_table); + rpl_filter->get_wild_ignore_table(&tmp); protocol->store(&tmp); protocol->store((uint32) mi->rli.last_slave_errno); @@ -2623,17 +2301,17 @@ int flush_master_info(MASTER_INFO* mi, bool flush_relay_log_cache) st_relay_log_info::st_relay_log_info() - :info_fd(-1), cur_log_fd(-1), save_temporary_tables(0), + :no_storage(FALSE), info_fd(-1), cur_log_fd(-1), save_temporary_tables(0), cur_log_old_open_count(0), group_master_log_pos(0), log_space_total(0), ignore_log_space_limit(0), last_master_timestamp(0), slave_skip_counter(0), abort_pos_wait(0), slave_run_id(0), sql_thd(0), last_slave_errno(0), inited(0), abort_slave(0), slave_running(0), until_condition(UNTIL_NONE), - until_log_pos(0), retried_trans(0) + until_log_pos(0), retried_trans(0), m_reload_flags(RELOAD_NONE_F), + unsafe_to_stop_at(0) { group_relay_log_name[0]= event_relay_log_name[0]= group_master_log_name[0]= 0; last_slave_error[0]= until_log_name[0]= ign_master_log_name_end[0]= 0; - bzero((char*) &info_file, sizeof(info_file)); bzero((char*) &cache_buf, sizeof(cache_buf)); cached_charset_invalidate(); @@ -3053,11 +2731,9 @@ static ulong read_event(MYSQL* mysql, MASTER_INFO *mi, bool* suppress_warnings) /* my_real_read() will time us out We check if we were told to die, and if not, try reading again - - TODO: Move 'events_till_disconnect' to the MASTER_INFO structure */ #ifndef DBUG_OFF - if (disconnect_slave_event_count && !(events_till_disconnect--)) + if (disconnect_slave_event_count && !(mi->events_till_disconnect--)) return packet_error; #endif @@ -3215,6 +2891,47 @@ bool st_relay_log_info::cached_charset_compare(char *charset) return 0; } +/* + Check if the current error is of temporary nature of not. + Some errors are temporary in nature, such as + ER_LOCK_DEADLOCK and ER_LOCK_WAIT_TIMEOUT. Ndb also signals + that the error is temporary by pushing a warning with the error code + ER_GET_TEMPORARY_ERRMSG, if the originating error is temporary. +*/ +static int has_temporary_error(THD *thd) +{ + if (thd->is_fatal_error) + return 0; + + /* + Temporary error codes: + currently, InnoDB deadlock detected by InnoDB or lock + wait timeout (innodb_lock_wait_timeout exceeded + */ + if (thd->net.last_errno == ER_LOCK_DEADLOCK || + thd->net.last_errno == ER_LOCK_WAIT_TIMEOUT) + return 1; + +#ifdef HAVE_NDB_BINLOG + /* + currently temporary error set in ndbcluster + */ + List_iterator_fast<MYSQL_ERROR> it(thd->warn_list); + MYSQL_ERROR *err; + while ((err= it++)) + { + DBUG_PRINT("info", ("has warning %d %s", err->code, err->msg)); + switch (err->code) + { + case ER_GET_TEMPORARY_ERRMSG: + return 1; + default: + break; + } + } +#endif + return 0; +} static int exec_relay_log_event(THD* thd, RELAY_LOG_INFO* rli) { @@ -3332,8 +3049,9 @@ static int exec_relay_log_event(THD* thd, RELAY_LOG_INFO* rli) thd->lex->current_select= 0; if (!ev->when) ev->when = time(NULL); - ev->thd = thd; + ev->thd = thd; // because up to this point, ev->thd == 0 exec_res = ev->exec_event(rli); + DBUG_PRINT("info", ("exec_event result = %d", exec_res)); DBUG_ASSERT(rli->sql_thd==thd); /* Format_description_log_event should not be deleted because it will be @@ -3347,17 +3065,13 @@ static int exec_relay_log_event(THD* thd, RELAY_LOG_INFO* rli) } if (slave_trans_retries) { - if (exec_res && - (thd->net.last_errno == ER_LOCK_DEADLOCK || - thd->net.last_errno == ER_LOCK_WAIT_TIMEOUT) && - !thd->is_fatal_error) + if (exec_res && has_temporary_error(thd)) { const char *errmsg; /* We were in a transaction which has been rolled back because of a - deadlock (currently, InnoDB deadlock detected by InnoDB) or lock - wait timeout (innodb_lock_wait_timeout exceeded); let's seek back to - BEGIN log event and retry it all again. + temporary error; + let's seek back to BEGIN log event and retry it all again. We have to not only seek but also a) init_master_info(), to seek back to hot relay log's start for later (for when we will come back to this hot log after re-processing the @@ -3404,7 +3118,7 @@ static int exec_relay_log_event(THD* thd, RELAY_LOG_INFO* rli) else { pthread_mutex_unlock(&rli->data_lock); - slave_print_error(rli, 0, "\ + slave_print_msg(ERROR_LEVEL, rli, 0, "\ Could not parse relay log event entry. The possible reasons are: the master's \ binary log is corrupted (you can check this by running 'mysqlbinlog' on the \ binary log), the slave's relay log is corrupted (you can check this by running \ @@ -3433,9 +3147,6 @@ pthread_handler_t handle_slave_io(void *arg) my_thread_init(); DBUG_ENTER("handle_slave_io"); -#ifndef DBUG_OFF -slave_begin: -#endif DBUG_ASSERT(mi->inited); mysql= NULL ; retry_count= 0; @@ -3445,7 +3156,7 @@ slave_begin: mi->slave_run_id++; #ifndef DBUG_OFF - mi->events_till_abort = abort_slave_event_count; + mi->events_till_disconnect = disconnect_slave_event_count; #endif thd= new THD; // note that contructor of THD uses DBUG_ ! @@ -3687,14 +3398,6 @@ ignore_log_space_limit=%d", log space"); goto err; } - // TODO: check debugging abort code -#ifndef DBUG_OFF - if (abort_slave_event_count && !--events_till_abort) - { - sql_print_error("Slave I/O thread: debugging abort"); - goto err; - } -#endif } } @@ -3733,10 +3436,6 @@ err: pthread_mutex_unlock(&LOCK_thread_count); pthread_cond_broadcast(&mi->stop_cond); // tell the world we are done pthread_mutex_unlock(&mi->run_lock); -#ifndef DBUG_OFF - if (abort_slave_event_count && !events_till_abort) - goto slave_begin; -#endif my_thread_end(); pthread_exit(0); DBUG_RETURN(0); // Can't return anything here @@ -3756,10 +3455,6 @@ pthread_handler_t handle_slave_sql(void *arg) my_thread_init(); DBUG_ENTER("handle_slave_sql"); -#ifndef DBUG_OFF -slave_begin: -#endif - DBUG_ASSERT(rli->inited); pthread_mutex_lock(&rli->run_lock); DBUG_ASSERT(!rli->slave_running); @@ -3892,10 +3587,39 @@ Slave SQL thread aborted. Can't execute init_slave query"); { // do not scare the user if SQL thread was simply killed or stopped if (!sql_slave_killed(thd,rli)) + { + /* + retrieve as much info as possible from the thd and, error codes and warnings + and print this to the error log as to allow the user to locate the error + */ + if (thd->net.last_errno != 0) + { + if (rli->last_slave_errno == 0) + { + slave_print_msg(ERROR_LEVEL, rli, thd->net.last_errno, + thd->net.last_error ? + thd->net.last_error : "<no message>"); + } + else if (rli->last_slave_errno != thd->net.last_errno) + { + sql_print_error("Slave (additional info): %s Error_code: %d", + thd->net.last_error ? + thd->net.last_error : "<no message>", + thd->net.last_errno); + } + } + + /* Print any warnings issued */ + List_iterator_fast<MYSQL_ERROR> it(thd->warn_list); + MYSQL_ERROR *err; + while ((err= it++)) + sql_print_warning("Slave: %s Error_code: %d",err->msg, err->code); + sql_print_error("\ Error running query, slave SQL thread aborted. Fix the problem, and restart \ the slave SQL thread with \"SLAVE START\". We stopped at log \ '%s' position %s", RPL_LOG_NAME, llstr(rli->group_master_log_pos, llbuff)); + } goto err; } } @@ -3906,6 +3630,14 @@ the slave SQL thread with \"SLAVE START\". We stopped at log \ RPL_LOG_NAME, llstr(rli->group_master_log_pos,llbuff)); err: + + /* + Some events set some playgrounds, which won't be cleared because thread + stops. Stopping of this thread may not be known to these events ("stop" + request is detected only by the present function, not by events), so we + must "proactively" clear playgrounds: + */ + rli->cleanup_context(thd, 1); VOID(pthread_mutex_lock(&LOCK_thread_count)); /* Some extra safety, which should not been needed (normally, event deletion @@ -3951,10 +3683,6 @@ the slave SQL thread with \"SLAVE START\". We stopped at log \ pthread_cond_broadcast(&rli->stop_cond); // tell the world we are done pthread_mutex_unlock(&rli->run_lock); -#ifndef DBUG_OFF // TODO: reconsider the code below - if (abort_slave_event_count && !rli->events_till_abort) - goto slave_begin; -#endif my_thread_end(); pthread_exit(0); DBUG_RETURN(0); // Can't return anything here @@ -3976,10 +3704,8 @@ static int process_io_create_file(MASTER_INFO* mi, Create_file_log_event* cev) if (unlikely(!cev->is_valid())) DBUG_RETURN(1); - /* - TODO: fix to honor table rules, not only db rules - */ - if (!db_ok(cev->db, replicate_do_db, replicate_ignore_db)) + + if (!rpl_filter->db_ok(cev->db)) { skip_load_data_infile(net); DBUG_RETURN(0); @@ -4109,7 +3835,7 @@ static int process_io_rotate(MASTER_INFO *mi, Rotate_log_event *rev) rotate event forever, so we need to not disconnect after one. */ if (disconnect_slave_event_count) - events_till_disconnect++; + mi->events_till_disconnect++; #endif /* @@ -4565,7 +4291,7 @@ static int connect_to_master(THD* thd, MYSQL* mysql, MASTER_INFO* mi, DBUG_ENTER("connect_to_master"); #ifndef DBUG_OFF - events_till_disconnect = disconnect_slave_event_count; + mi->events_till_disconnect = disconnect_slave_event_count; #endif ulong client_flag= CLIENT_REMEMBER_OPTIONS; if (opt_slave_compressed_protocol) @@ -4638,8 +4364,8 @@ replication resumed in log '%s' at position %s", mi->user, else { change_rpl_status(RPL_IDLE_SLAVE,RPL_ACTIVE_SLAVE); - mysql_log.write(thd, COM_CONNECT_OUT, "%s@%s:%d", - mi->user, mi->host, mi->port); + general_log_print(thd, COM_CONNECT_OUT, "%s@%s:%d", + mi->user, mi->host, mi->port); } #ifdef SIGNAL_WITH_VIO_CLOSE thd->set_active_vio(mysql->net.vio); @@ -4699,6 +4425,10 @@ static int safe_reconnect(THD* thd, MYSQL* mysql, MASTER_INFO* mi, bool flush_relay_log_info(RELAY_LOG_INFO* rli) { bool error=0; + + if (unlikely(rli->no_storage)) + return 0; + IO_CACHE *file = &rli->info_file; char buff[FN_REFLEN*2+22*2+4], *pos; @@ -4715,6 +4445,7 @@ bool flush_relay_log_info(RELAY_LOG_INFO* rli) error=1; if (flush_io_cache(file)) error=1; + /* Flushing the relay log is done by the slave I/O thread */ return error; } @@ -4745,7 +4476,7 @@ static IO_CACHE *reopen_relay_log(RELAY_LOG_INFO *rli, const char **errmsg) } -Log_event* next_event(RELAY_LOG_INFO* rli) +static Log_event* next_event(RELAY_LOG_INFO* rli) { Log_event* ev; IO_CACHE* cur_log = rli->cur_log; @@ -4756,6 +4487,11 @@ Log_event* next_event(RELAY_LOG_INFO* rli) DBUG_ENTER("next_event"); DBUG_ASSERT(thd != 0); +#ifndef DBUG_OFF + if (abort_slave_event_count && !rli->events_till_abort--) + DBUG_RETURN(0); +#endif + /* For most operations we need to protect rli members with data_lock, so we assume calling function acquired this mutex for us and we will @@ -4877,7 +4613,7 @@ Log_event* next_event(RELAY_LOG_INFO* rli) { /* We generate and return a Rotate, to make our positions advance */ DBUG_PRINT("info",("seeing an ignored end segment")); - ev= new Rotate_log_event(thd, rli->ign_master_log_name_end, + ev= new Rotate_log_event(rli->ign_master_log_name_end, 0, rli->ign_master_log_pos_end, Rotate_log_event::DUP_NAME); rli->ign_master_log_name_end[0]= 0; @@ -5125,11 +4861,114 @@ end: DBUG_VOID_RETURN; } +/* + Some system tables needed to be re-read by the MySQL server after it has + updated them; in statement-based replication, the GRANT and other commands + are sent verbatim to the slave which then reloads; in row-based replication, + changes to these tables are done through ordinary Rows binlog events, so + master must add some flag for the slave to know it has to reload the tables. +*/ +struct st_reload_entry +{ + char const *table; + st_relay_log_info::enum_reload_flag flag; +}; + +/* + Sorted array of table names, please keep it sorted since we are + using bsearch() on it below. + */ +static st_reload_entry s_mysql_tables[] = +{ + { "columns_priv", st_relay_log_info::RELOAD_GRANT_F }, + { "db", st_relay_log_info::RELOAD_ACCESS_F }, + { "host", st_relay_log_info::RELOAD_ACCESS_F }, + { "procs_priv", st_relay_log_info::RELOAD_GRANT_F }, + { "tables_priv", st_relay_log_info::RELOAD_GRANT_F }, + { "user", st_relay_log_info::RELOAD_ACCESS_F } +}; + +static const my_size_t s_mysql_tables_size = + sizeof(s_mysql_tables)/sizeof(*s_mysql_tables); + +static int reload_entry_compare(const void *lhs, const void *rhs) +{ + const char *lstr = static_cast<const char *>(lhs); + const char *rstr = static_cast<const st_reload_entry*>(rhs)->table; + return strcmp(lstr, rstr); +} + +void st_relay_log_info::touching_table(char const* db, char const* table, + ulong table_id) +{ + if (strcmp(db,"mysql") == 0) + { +#if defined(HAVE_BSEARCH) && defined(HAVE_SIZE_T) + void *const ptr= bsearch(table, s_mysql_tables, + s_mysql_tables_size, + sizeof(*s_mysql_tables), reload_entry_compare); + st_reload_entry const *const entry= static_cast<st_reload_entry*>(ptr); +#else + /* + Fall back to full scan, there are few rows anyway and updating the + "mysql" database is rare. + */ + st_reload_entry const *entry= s_mysql_tables; + for ( ; entry < s_mysql_tables + s_mysql_tables_size ; entry++) + if (reload_entry_compare(table, entry) == 0) + break; +#endif + if (entry) + m_reload_flags|= entry->flag; + } +} + +void st_relay_log_info::transaction_end(THD* thd) +{ + if (m_reload_flags != RELOAD_NONE_F) + { + if (m_reload_flags & RELOAD_ACCESS_F) + acl_reload(thd); + + if (m_reload_flags & RELOAD_GRANT_F) + grant_reload(thd); + + m_reload_flags= RELOAD_NONE_F; + } +} + +#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) +void st_relay_log_info::cleanup_context(THD *thd, bool error) +{ + DBUG_ASSERT(sql_thd == thd); + /* + 1) Instances of Table_map_log_event, if ::exec_event() was called on them, + may have opened tables, which we cannot be sure have been closed (because + maybe the Rows_log_event have not been found or will not be, because slave + SQL thread is stopping, or relay log has a missing tail etc). So we close + all thread's tables. And so the table mappings have to be cancelled. + 2) Rows_log_event::exec_event() may even have started statements or + transactions on them, which we need to rollback in case of error. + 3) If finding a Format_description_log_event after a BEGIN, we also need + to rollback before continuing with the next events. + 4) so we need this "context cleanup" function. + */ + if (error) + { + ha_autocommit_or_rollback(thd, 1); // if a "statement transaction" + end_trans(thd, ROLLBACK); // if a "real transaction" + } + m_table_map.clear_tables(); + close_thread_tables(thd); + unsafe_to_stop_at= 0; +} +#endif + #ifdef HAVE_EXPLICIT_TEMPLATE_INSTANTIATION template class I_List_iterator<i_string>; template class I_List_iterator<i_string_pair>; #endif - #endif /* HAVE_REPLICATION */ + diff --git a/sql/slave.h b/sql/slave.h index 040ce4eaf85..0b77d7f7c4f 100644 --- a/sql/slave.h +++ b/sql/slave.h @@ -14,15 +14,19 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#ifdef HAVE_REPLICATION - #ifndef SLAVE_H #define SLAVE_H -#include "mysql.h" +#ifdef HAVE_REPLICATION + +#include "log.h" #include "my_list.h" +#include "rpl_filter.h" +#include "rpl_tblmap.h" +#include "rpl_rli.h" + #define SLAVE_NET_TIMEOUT 3600 -#define MAX_SLAVE_ERRMSG 1024 + #define MAX_SLAVE_ERROR 2000 /***************************************************************************** @@ -108,265 +112,7 @@ struct st_master_info; #define MYSQL_SLAVE_RUN_NOT_CONNECT 1 #define MYSQL_SLAVE_RUN_CONNECT 2 -/**************************************************************************** - - Replication SQL Thread - - st_relay_log_info contains: - - the current relay log - - the current relay log offset - - master log name - - master log sequence corresponding to the last update - - misc information specific to the SQL thread - - st_relay_log_info is initialized from the slave.info file if such exists. - Otherwise, data members are intialized with defaults. The initialization is - done with init_relay_log_info() call. - - The format of slave.info file: - - relay_log_name - relay_log_pos - master_log_name - master_log_pos - - To clean up, call end_relay_log_info() - -*****************************************************************************/ - -typedef struct st_relay_log_info -{ - /*** The following variables can only be read when protect by data lock ****/ - - /* - info_fd - file descriptor of the info file. set only during - initialization or clean up - safe to read anytime - cur_log_fd - file descriptor of the current read relay log - */ - File info_fd,cur_log_fd; - - /* - Protected with internal locks. - Must get data_lock when resetting the logs. - */ - MYSQL_LOG relay_log; - LOG_INFO linfo; - IO_CACHE cache_buf,*cur_log; - - /* The following variables are safe to read any time */ - - /* IO_CACHE of the info file - set only during init or end */ - IO_CACHE info_file; - - /* - When we restart slave thread we need to have access to the previously - created temporary tables. Modified only on init/end and by the SQL - thread, read only by SQL thread. - */ - TABLE *save_temporary_tables; - - /* - standard lock acquistion order to avoid deadlocks: - run_lock, data_lock, relay_log.LOCK_log, relay_log.LOCK_index - */ - pthread_mutex_t data_lock,run_lock; - - /* - start_cond is broadcast when SQL thread is started - stop_cond - when stopped - data_cond - when data protected by data_lock changes - */ - pthread_cond_t start_cond, stop_cond, data_cond; - - /* parent master info structure */ - struct st_master_info *mi; - - /* - Needed to deal properly with cur_log getting closed and re-opened with - a different log under our feet - */ - uint32 cur_log_old_open_count; - - /* - Let's call a group (of events) : - - a transaction - or - - an autocommiting query + its associated events (INSERT_ID, - TIMESTAMP...) - We need these rli coordinates : - - relay log name and position of the beginning of the group we currently are - executing. Needed to know where we have to restart when replication has - stopped in the middle of a group (which has been rolled back by the slave). - - relay log name and position just after the event we have just - executed. This event is part of the current group. - Formerly we only had the immediately above coordinates, plus a 'pending' - variable, but this dealt wrong with the case of a transaction starting on a - relay log and finishing (commiting) on another relay log. Case which can - happen when, for example, the relay log gets rotated because of - max_binlog_size. - */ - char group_relay_log_name[FN_REFLEN]; - ulonglong group_relay_log_pos; - char event_relay_log_name[FN_REFLEN]; - ulonglong event_relay_log_pos; - ulonglong future_event_relay_log_pos; - - /* - Original log name and position of the group we're currently executing - (whose coordinates are group_relay_log_name/pos in the relay log) - in the master's binlog. These concern the *group*, because in the master's - binlog the log_pos that comes with each event is the position of the - beginning of the group. - */ - char group_master_log_name[FN_REFLEN]; - volatile my_off_t group_master_log_pos; - - /* - Handling of the relay_log_space_limit optional constraint. - ignore_log_space_limit is used to resolve a deadlock between I/O and SQL - threads, the SQL thread sets it to unblock the I/O thread and make it - temporarily forget about the constraint. - */ - ulonglong log_space_limit,log_space_total; - bool ignore_log_space_limit; - - /* - When it commits, InnoDB internally stores the master log position it has - processed so far; the position to store is the one of the end of the - committing event (the COMMIT query event, or the event if in autocommit - mode). - */ -#if MYSQL_VERSION_ID < 40100 - ulonglong future_master_log_pos; -#else - ulonglong future_group_master_log_pos; -#endif - - time_t last_master_timestamp; - - /* - Needed for problems when slave stops and we want to restart it - skipping one or more events in the master log that have caused - errors, and have been manually applied by DBA already. - */ - volatile uint32 slave_skip_counter; - volatile ulong abort_pos_wait; /* Incremented on change master */ - volatile ulong slave_run_id; /* Incremented on slave start */ - pthread_mutex_t log_space_lock; - pthread_cond_t log_space_cond; - THD * sql_thd; - int last_slave_errno; -#ifndef DBUG_OFF - int events_till_abort; -#endif - char last_slave_error[MAX_SLAVE_ERRMSG]; - - /* if not set, the value of other members of the structure are undefined */ - bool inited; - volatile bool abort_slave; - volatile uint slave_running; - - /* - Condition and its parameters from START SLAVE UNTIL clause. - - UNTIL condition is tested with is_until_satisfied() method that is - called by exec_relay_log_event(). is_until_satisfied() caches the result - of the comparison of log names because log names don't change very often; - this cache is invalidated by parts of code which change log names with - notify_*_log_name_updated() methods. (They need to be called only if SQL - thread is running). - */ - - enum {UNTIL_NONE= 0, UNTIL_MASTER_POS, UNTIL_RELAY_POS} until_condition; - char until_log_name[FN_REFLEN]; - ulonglong until_log_pos; - /* extension extracted from log_name and converted to int */ - ulong until_log_name_extension; - /* - Cached result of comparison of until_log_name and current log name - -2 means unitialised, -1,0,1 are comarison results - */ - enum - { - UNTIL_LOG_NAMES_CMP_UNKNOWN= -2, UNTIL_LOG_NAMES_CMP_LESS= -1, - UNTIL_LOG_NAMES_CMP_EQUAL= 0, UNTIL_LOG_NAMES_CMP_GREATER= 1 - } until_log_names_cmp_result; - - char cached_charset[6]; - /* - trans_retries varies between 0 to slave_transaction_retries and counts how - many times the slave has retried the present transaction; gets reset to 0 - when the transaction finally succeeds. retried_trans is a cumulative - counter: how many times the slave has retried a transaction (any) since - slave started. - */ - ulong trans_retries, retried_trans; - - /* - If the end of the hot relay log is made of master's events ignored by the - slave I/O thread, these two keep track of the coords (in the master's - binlog) of the last of these events seen by the slave I/O thread. If not, - ign_master_log_name_end[0] == 0. - As they are like a Rotate event read/written from/to the relay log, they - are both protected by rli->relay_log.LOCK_log. - */ - char ign_master_log_name_end[FN_REFLEN]; - ulonglong ign_master_log_pos_end; - - st_relay_log_info(); - ~st_relay_log_info(); - - /* - Invalidate cached until_log_name and group_relay_log_name comparison - result. Should be called after any update of group_realy_log_name if - there chances that sql_thread is running. - */ - inline void notify_group_relay_log_name_update() - { - if (until_condition==UNTIL_RELAY_POS) - until_log_names_cmp_result= UNTIL_LOG_NAMES_CMP_UNKNOWN; - } - - /* - The same as previous but for group_master_log_name. - */ - inline void notify_group_master_log_name_update() - { - if (until_condition==UNTIL_MASTER_POS) - until_log_names_cmp_result= UNTIL_LOG_NAMES_CMP_UNKNOWN; - } - - inline void inc_event_relay_log_pos() - { - event_relay_log_pos= future_event_relay_log_pos; - } - - void inc_group_relay_log_pos(ulonglong log_pos, - bool skip_lock=0); - - int wait_for_pos(THD* thd, String* log_name, longlong log_pos, - longlong timeout); - void close_temporary_tables(); - - /* Check if UNTIL condition is satisfied. See slave.cc for more. */ - bool is_until_satisfied(); - inline ulonglong until_pos() - { - return ((until_condition == UNTIL_MASTER_POS) ? group_master_log_pos : - group_relay_log_pos); - } - /* - Last charset (6 bytes) seen by slave SQL thread is cached here; it helps - the thread save 3 get_charset() per Query_log_event if the charset is not - changing from event to event (common situation). - When the 6 bytes are equal to 0 is used to mean "cache is invalidated". - */ - void cached_charset_invalidate(); - bool cached_charset_compare(char *charset); -} RELAY_LOG_INFO; - - -Log_event* next_event(RELAY_LOG_INFO* rli); +static Log_event* next_event(RELAY_LOG_INFO* rli); /***************************************************************************** @@ -425,7 +171,7 @@ typedef struct st_master_info uint port; uint connect_retry; #ifndef DBUG_OFF - int events_till_abort; + int events_till_disconnect; #endif bool inited; volatile bool abort_slave; @@ -472,26 +218,11 @@ typedef struct st_master_info int queue_event(MASTER_INFO* mi,const char* buf,ulong event_len); -typedef struct st_table_rule_ent -{ - char* db; - char* tbl_name; - uint key_len; -} TABLE_RULE_ENT; - -#define TABLE_RULE_HASH_SIZE 16 -#define TABLE_RULE_ARR_SIZE 16 -#define MAX_SLAVE_ERRMSG 1024 - #define RPL_LOG_NAME (rli->group_master_log_name[0] ? rli->group_master_log_name :\ "FIRST") #define IO_RPL_LOG_NAME (mi->master_log_name[0] ? mi->master_log_name :\ "FIRST") -/* masks for start/stop operations on io and sql slave threads */ -#define SLAVE_IO 1 -#define SLAVE_SQL 2 - /* If the following is set, if first gives an error, second will be tried. Otherwise, if first fails, we fail. @@ -534,31 +265,14 @@ int mysql_table_dump(THD* thd, const char* db, int fetch_master_table(THD* thd, const char* db_name, const char* table_name, MASTER_INFO* mi, MYSQL* mysql, bool overwrite); -void table_rule_ent_hash_to_str(String* s, HASH* h); -void table_rule_ent_dynamic_array_to_str(String* s, DYNAMIC_ARRAY* a); bool show_master_info(THD* thd, MASTER_INFO* mi); bool show_binlog_info(THD* thd); -/* See if the query uses any tables that should not be replicated */ -bool tables_ok(THD* thd, TABLE_LIST* tables); - -/* - Check to see if the database is ok to operate on with respect to the - do and ignore lists - used in replication -*/ -int db_ok(const char* db, I_List<i_string> &do_list, - I_List<i_string> &ignore_list ); -int db_ok_with_wild_table(const char *db); - -int add_table_rule(HASH* h, const char* table_spec); -int add_wild_table_rule(DYNAMIC_ARRAY* a, const char* table_spec); -void init_table_rule_hash(HASH* h, bool* h_inited); -void init_table_rule_array(DYNAMIC_ARRAY* a, bool* a_inited); -const char *rewrite_db(const char* db, uint32 *new_db_len); const char *print_slave_db_safe(const char *db); int check_expected_error(THD* thd, RELAY_LOG_INFO* rli, int error_code); void skip_load_data_infile(NET* net); -void slave_print_error(RELAY_LOG_INFO* rli, int err_code, const char* msg, ...); +void slave_print_msg(enum loglevel level, RELAY_LOG_INFO* rli, + int err_code, const char* msg, ...); void end_slave(); /* clean up */ void init_master_info_with_options(MASTER_INFO* mi); @@ -588,11 +302,6 @@ pthread_handler_t handle_slave_sql(void *arg); extern bool volatile abort_loop; extern MASTER_INFO main_mi, *active_mi; /* active_mi for multi-master */ extern LIST master_list; -extern HASH replicate_do_table, replicate_ignore_table; -extern DYNAMIC_ARRAY replicate_wild_do_table, replicate_wild_ignore_table; -extern bool do_table_inited, ignore_table_inited, - wild_do_table_inited, wild_ignore_table_inited; -extern bool table_rules_on; extern my_bool replicate_same_server_id; extern int disconnect_slave_event_count, abort_slave_event_count ; @@ -607,12 +316,14 @@ extern my_bool master_ssl; extern my_string master_ssl_ca, master_ssl_capath, master_ssl_cert, master_ssl_cipher, master_ssl_key; -extern I_List<i_string> replicate_do_db, replicate_ignore_db; -extern I_List<i_string_pair> replicate_rewrite_db; extern I_List<THD> threads; -#endif -#else +#endif /* HAVE_REPLICATION */ + +/* masks for start/stop operations on io and sql slave threads */ #define SLAVE_IO 1 #define SLAVE_SQL 2 -#endif /* HAVE_REPLICATION */ + +#endif + + diff --git a/sql/sp.cc b/sql/sp.cc index ce0282bf810..481d81edfe4 100644 --- a/sql/sp.cc +++ b/sql/sp.cc @@ -14,7 +14,6 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - #include "mysql_priv.h" #include "sp.h" #include "sp_head.h" @@ -466,10 +465,12 @@ static void sp_returns_type(THD *thd, String &result, sp_head *sp) { TABLE table; + TABLE_SHARE share; Field *field; - bzero(&table, sizeof(table)); + bzero((char*) &table, sizeof(table)); + bzero((char*) &share, sizeof(share)); table.in_use= thd; - table.s = &table.share_not_to_be_used; + table.s = &share; field= sp->create_result_field(0, 0, &table); field->sql_type(result); delete field; @@ -500,7 +501,7 @@ db_create_routine(THD *thd, int type, sp_head *sp) else { restore_record(table, s->default_values); // Get default values for fields - strxmov(definer, thd->security_ctx->priv_user, "@", + strxnmov(definer, sizeof(definer)-1, thd->security_ctx->priv_user, "@", thd->security_ctx->priv_host, NullS); if (table->s->fields != MYSQL_PROC_FIELD_COUNT) @@ -587,14 +588,14 @@ db_create_routine(THD *thd, int type, sp_head *sp) } ret= SP_OK; - if (table->file->write_row(table->record[0])) + if (table->file->ha_write_row(table->record[0])) ret= SP_WRITE_ROW_FAILED; else if (mysql_bin_log.is_open()) { thd->clear_error(); /* Such a statement can always go directly to binlog, no trans cache */ - Query_log_event qinfo(thd, thd->query, thd->query_length, 0, FALSE); - mysql_bin_log.write(&qinfo); + thd->binlog_query(THD::MYSQL_QUERY_TYPE, + thd->query, thd->query_length, FALSE, FALSE); } } @@ -620,7 +621,7 @@ db_drop_routine(THD *thd, int type, sp_name *name) DBUG_RETURN(SP_OPEN_TABLE_FAILED); if ((ret= db_find_routine_aux(thd, type, name, table)) == SP_OK) { - if (table->file->delete_row(table->record[0])) + if (table->file->ha_delete_row(table->record[0])) ret= SP_DELETE_ROW_FAILED; } close_thread_tables(thd); @@ -655,7 +656,7 @@ db_update_routine(THD *thd, int type, sp_name *name, st_sp_chistics *chistics) table->field[MYSQL_PROC_FIELD_COMMENT]->store(chistics->comment.str, chistics->comment.length, system_charset_info); - if ((table->file->update_row(table->record[1],table->record[0]))) + if ((table->file->ha_update_row(table->record[1],table->record[0]))) ret= SP_WRITE_ROW_FAILED; } close_thread_tables(thd); @@ -816,7 +817,7 @@ db_show_routine_status(THD *thd, int type, const char *wild) } } - table->file->ha_index_init(0); + table->file->ha_index_init(0, 1); if ((res= table->file->index_first(table->record[0]))) { res= (res == HA_ERR_END_OF_FILE) ? 0 : SP_INTERNAL_ERROR; @@ -866,7 +867,7 @@ sp_drop_db_routines(THD *thd, char *db) goto err; ret= SP_OK; - table->file->ha_index_init(0); + table->file->ha_index_init(0, 1); if (! table->file->index_read(table->record[0], key, keylen, HA_READ_KEY_EXACT)) { @@ -875,7 +876,7 @@ sp_drop_db_routines(THD *thd, char *db) do { - if (! table->file->delete_row(table->record[0])) + if (! table->file->ha_delete_row(table->record[0])) deleted= TRUE; /* We deleted something */ else { diff --git a/sql/sp_head.cc b/sql/sp_head.cc index f7572a374f1..d7490a5ac8e 100644 --- a/sql/sp_head.cc +++ b/sql/sp_head.cc @@ -180,11 +180,11 @@ sp_get_flags_for_command(LEX *lex) case SQLCOM_SHOW_ERRORS: case SQLCOM_SHOW_FIELDS: case SQLCOM_SHOW_GRANTS: - case SQLCOM_SHOW_INNODB_STATUS: + case SQLCOM_SHOW_ENGINE_STATUS: + case SQLCOM_SHOW_ENGINE_LOGS: + case SQLCOM_SHOW_ENGINE_MUTEX: case SQLCOM_SHOW_KEYS: - case SQLCOM_SHOW_LOGS: case SQLCOM_SHOW_MASTER_STAT: - case SQLCOM_SHOW_MUTEX_STATUS: case SQLCOM_SHOW_NEW_MASTER: case SQLCOM_SHOW_OPEN_TABLES: case SQLCOM_SHOW_PRIVILEGES: @@ -255,6 +255,9 @@ sp_get_flags_for_command(LEX *lex) case SQLCOM_ALTER_FUNCTION: case SQLCOM_DROP_PROCEDURE: case SQLCOM_DROP_FUNCTION: + case SQLCOM_CREATE_EVENT: + case SQLCOM_ALTER_EVENT: + case SQLCOM_DROP_EVENT: flags= sp_head::HAS_COMMIT_OR_ROLLBACK; break; default: @@ -314,6 +317,9 @@ sp_eval_expr(THD *thd, Field *result_field, Item *expr_item) { DBUG_ENTER("sp_eval_expr"); + if (!expr_item) + DBUG_RETURN(TRUE); + if (!(expr_item= sp_prepare_func_item(thd, &expr_item))) DBUG_RETURN(TRUE); @@ -497,7 +503,7 @@ void sp_head::init_strings(THD *thd, LEX *lex, sp_name *name) { DBUG_ENTER("sp_head::init_strings"); - uchar *endp; /* Used to trim the end */ + const uchar *endp; /* Used to trim the end */ /* During parsing, we must use thd->mem_root */ MEM_ROOT *root= thd->mem_root; @@ -698,7 +704,8 @@ sp_head::create_result_field(uint field_max_length, const char *field_name, field_length= !m_return_field_def.length ? field_max_length : m_return_field_def.length; - field= ::make_field((char*) 0, /* field ptr */ + field= ::make_field(table->s, /* TABLE_SHARE ptr */ + (char*) 0, /* field ptr */ field_length, /* field [max] length */ (uchar*) "", /* null ptr */ 0, /* null bit */ @@ -708,8 +715,10 @@ sp_head::create_result_field(uint field_max_length, const char *field_name, m_return_field_def.geom_type, Field::NONE, /* unreg check */ m_return_field_def.interval, - field_name ? field_name : (const char *) m_name.str, - table); + field_name ? field_name : (const char *) m_name.str); + + if (field) + field->init(table); DBUG_RETURN(field); } @@ -723,6 +732,9 @@ int cmp_splocal_locations(Item_splocal * const *a, Item_splocal * const *b) /* StoredRoutinesBinlogging + This paragraph applies only to statement-based binlogging. Row-based + binlogging does not need anything special like this. + Top-down overview: 1. Statements @@ -736,7 +748,7 @@ int cmp_splocal_locations(Item_splocal * const *a, Item_splocal * const *b) Statements that have is_update_query(stmt) == FALSE (e.g. SELECTs) are not written into binary log. Instead we catch function calls the statement makes and write it into binary log separately (see #3). - + 2. PROCEDURE calls CALL statements are not written into binary log. Instead @@ -757,7 +769,7 @@ int cmp_splocal_locations(Item_splocal * const *a, Item_splocal * const *b) function execution (grep for start_union_events and stop_union_events) If the answers are No and Yes, we write the function call into the binary - log as "SELECT spfunc(<param1value>, <param2value>, ...)". + log as "SELECT spfunc(<param1value>, <param2value>, ...)" 4. Miscellaneous issues. @@ -1284,56 +1296,62 @@ sp_head::execute_function(THD *thd, Item **argp, uint argcount, thd->spcont= nctx; - binlog_save_options= thd->options; - need_binlog_call= mysql_bin_log.is_open() && (thd->options & OPTION_BIN_LOG); + /* + If row-based binlogging, we don't need to binlog the function's call, let + each substatement be binlogged its way. + */ + need_binlog_call= mysql_bin_log.is_open() && + (thd->options & OPTION_BIN_LOG) && !thd->current_stmt_binlog_row_based; if (need_binlog_call) { reset_dynamic(&thd->user_var_events); mysql_bin_log.start_union_events(thd); + binlog_save_options= thd->options; + thd->options&= ~OPTION_BIN_LOG; } - - thd->options&= ~OPTION_BIN_LOG; + err_status= execute(thd); - thd->options= binlog_save_options; - - if (need_binlog_call) - mysql_bin_log.stop_union_events(thd); - if (need_binlog_call && thd->binlog_evt_union.unioned_events) + if (need_binlog_call) { - char buf[256]; - String bufstr(buf, sizeof(buf), &my_charset_bin); - bufstr.length(0); - bufstr.append(STRING_WITH_LEN("SELECT ")); - append_identifier(thd, &bufstr, m_name.str, m_name.length); - bufstr.append('('); - for (uint i=0; i < argcount; i++) + mysql_bin_log.stop_union_events(thd); + thd->options= binlog_save_options; + if (thd->binlog_evt_union.unioned_events) { - String str_value_holder; - String *str_value; - - if (i) - bufstr.append(','); + char buf[256]; + String bufstr(buf, sizeof(buf), &my_charset_bin); + bufstr.length(0); + bufstr.append(STRING_WITH_LEN("SELECT ")); + append_identifier(thd, &bufstr, m_name.str, m_name.length); + bufstr.append('('); + for (uint i=0; i < argcount; i++) + { + String str_value_holder; + String *str_value; - str_value= sp_get_item_value(param_values[i], &str_value_holder); + if (i) + bufstr.append(','); + + str_value= sp_get_item_value(param_values[i], &str_value_holder); - if (str_value) - bufstr.append(*str_value); - else - bufstr.append(STRING_WITH_LEN("NULL")); - } - bufstr.append(')'); - - Query_log_event qinfo(thd, bufstr.ptr(), bufstr.length(), - thd->binlog_evt_union.unioned_events_trans, FALSE); - if (mysql_bin_log.write(&qinfo) && - thd->binlog_evt_union.unioned_events_trans) - { - push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR, - "Invoked ROUTINE modified a transactional table but MySQL " - "failed to reflect this change in the binary log"); + if (str_value) + bufstr.append(*str_value); + else + bufstr.append(STRING_WITH_LEN("NULL")); + } + bufstr.append(')'); + + Query_log_event qinfo(thd, bufstr.ptr(), bufstr.length(), + thd->binlog_evt_union.unioned_events_trans, FALSE); + if (mysql_bin_log.write(&qinfo) && + thd->binlog_evt_union.unioned_events_trans) + { + push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR, + "Invoked ROUTINE modified a transactional table but MySQL " + "failed to reflect this change in the binary log"); + } + reset_dynamic(&thd->user_var_events); } - reset_dynamic(&thd->user_var_events); } if (m_type == TYPE_ENUM_FUNCTION && !err_status) @@ -1394,6 +1412,8 @@ sp_head::execute_procedure(THD *thd, List<Item> *args) uint params = m_pcont->context_pvars(); sp_rcontext *save_spcont, *octx; sp_rcontext *nctx = NULL; + bool save_enable_slow_log= false; + bool save_log_general= false; DBUG_ENTER("sp_head::execute_procedure"); DBUG_PRINT("info", ("procedure %s", m_name.str)); @@ -1492,12 +1512,28 @@ sp_head::execute_procedure(THD *thd, List<Item> *args) DBUG_PRINT("info",(" %.*s: eval args done", m_name.length, m_name.str)); } - + if (!(m_flags & LOG_SLOW_STATEMENTS) && thd->enable_slow_log) + { + DBUG_PRINT("info", ("Disabling slow log for the execution")); + save_enable_slow_log= true; + thd->enable_slow_log= FALSE; + } + if (!(m_flags & LOG_GENERAL_LOG) && !(thd->options & OPTION_LOG_OFF)) + { + DBUG_PRINT("info", ("Disabling general log for the execution")); + save_log_general= true; + /* disable this bit */ + thd->options |= OPTION_LOG_OFF; + } thd->spcont= nctx; - + if (!err_status) err_status= execute(thd); + if (save_log_general) + thd->options &= ~OPTION_LOG_OFF; + if (save_enable_slow_log) + thd->enable_slow_log= true; /* In the case when we weren't able to employ reuse mechanism for OUT/INOUT paranmeters, we should reallocate memory. This @@ -2276,10 +2312,15 @@ sp_instr_stmt::execute(THD *thd, uint *nextp) (the order of query cache and subst_spvars calls is irrelevant because queries with SP vars can't be cached) */ + if (unlikely((thd->options & OPTION_LOG_OFF)==0)) + general_log_print(thd, COM_QUERY, "%s", thd->query); + if (query_cache_send_result_to_client(thd, thd->query, thd->query_length) <= 0) { res= m_lex_keeper.reset_lex_and_exec_core(thd, nextp, FALSE, this); + if (!res && unlikely(thd->enable_slow_log)) + log_slow_statement(thd); query_cache_end_of_result(thd); } else diff --git a/sql/sp_head.h b/sql/sp_head.h index a4dd68ee4a3..472e5343991 100644 --- a/sql/sp_head.h +++ b/sql/sp_head.h @@ -124,7 +124,9 @@ public: IS_INVOKED= 32, // Is set if this sp_head is being used HAS_SET_AUTOCOMMIT_STMT= 64,// Is set if a procedure with 'set autocommit' /* Is set if a procedure with COMMIT (implicit or explicit) | ROLLBACK */ - HAS_COMMIT_OR_ROLLBACK= 128 + HAS_COMMIT_OR_ROLLBACK= 128, + LOG_SLOW_STATEMENTS= 256, // Used by events + LOG_GENERAL_LOG= 512 // Used by events }; /* TYPE_ENUM_FUNCTION, TYPE_ENUM_PROCEDURE or TYPE_ENUM_TRIGGER */ @@ -133,8 +135,7 @@ public: create_field m_return_field_def; /* This is used for FUNCTIONs only. */ - uchar *m_tmp_query; // Temporary pointer to sub query string - uint m_old_cmq; // Old CLIENT_MULTI_QUERIES value + const uchar *m_tmp_query; // Temporary pointer to sub query string st_sp_chistics *m_chistics; ulong m_sql_mode; // For SHOW CREATE and execution LEX_STRING m_qname; // db.name @@ -182,7 +183,7 @@ public: */ HASH m_sroutines; // Pointers set during parsing - uchar *m_param_begin, *m_param_end, *m_body_begin; + const uchar *m_param_begin, *m_param_end, *m_body_begin; /* Security context for stored routine which should be run under diff --git a/sql/sql_acl.cc b/sql/sql_acl.cc index 88c3c86aacb..3c2d1328a57 100644 --- a/sql/sql_acl.cc +++ b/sql/sql_acl.cc @@ -27,16 +27,131 @@ #include "mysql_priv.h" #include "hash_filo.h" -#ifdef HAVE_REPLICATION -#include "sql_repl.h" //for tables_ok() -#endif #include <m_ctype.h> #include <stdarg.h> #include "sp_head.h" #include "sp.h" +time_t mysql_db_table_last_check= 0L; + +TABLE_FIELD_W_TYPE mysql_db_table_fields[MYSQL_DB_FIELD_COUNT] = { + { + {(char *) STRING_WITH_LEN("Host")}, + {(char *) STRING_WITH_LEN("char(60)")}, + {NULL, 0} + }, + { + {(char *) STRING_WITH_LEN("Db")}, + {(char *) STRING_WITH_LEN("char(64)")}, + {NULL, 0} + }, + { + {(char *) STRING_WITH_LEN("User")}, + {(char *) STRING_WITH_LEN("char(16)")}, + {NULL, 0} + }, + { + {(char *) STRING_WITH_LEN("Select_priv")}, + {(char *) STRING_WITH_LEN("enum('N','Y')")}, + {(char *) STRING_WITH_LEN("utf8")} + }, + { + {(char *) STRING_WITH_LEN("Insert_priv")}, + {(char *) STRING_WITH_LEN("enum('N','Y')")}, + {(char *) STRING_WITH_LEN("utf8")} + }, + { + {(char *) STRING_WITH_LEN("Update_priv")}, + {(char *) STRING_WITH_LEN("enum('N','Y')")}, + {(char *) STRING_WITH_LEN("utf8")} + }, + { + {(char *) STRING_WITH_LEN("Delete_priv")}, + {(char *) STRING_WITH_LEN("enum('N','Y')")}, + {(char *) STRING_WITH_LEN("utf8")} + }, + { + {(char *) STRING_WITH_LEN("Create_priv")}, + {(char *) STRING_WITH_LEN("enum('N','Y')")}, + {(char *) STRING_WITH_LEN("utf8")} + }, + { + {(char *) STRING_WITH_LEN("Drop_priv")}, + {(char *) STRING_WITH_LEN("enum('N','Y')")}, + {(char *) STRING_WITH_LEN("utf8")} + }, + { + {(char *) STRING_WITH_LEN("Grant_priv")}, + {(char *) STRING_WITH_LEN("enum('N','Y')")}, + {(char *) STRING_WITH_LEN("utf8")} + }, + { + {(char *) STRING_WITH_LEN("References_priv")}, + {(char *) STRING_WITH_LEN("enum('N','Y')")}, + {(char *) STRING_WITH_LEN("utf8")} + }, + { + {(char *) STRING_WITH_LEN("Index_priv")}, + {(char *) STRING_WITH_LEN("enum('N','Y')")}, + {(char *) STRING_WITH_LEN("utf8")} + }, + { + {(char *) STRING_WITH_LEN("Alter_priv")}, + {(char *) STRING_WITH_LEN("enum('N','Y')")}, + {(char *) STRING_WITH_LEN("utf8")} + }, + { + {(char *) STRING_WITH_LEN("Create_tmp_table_priv")}, + {(char *) STRING_WITH_LEN("enum('N','Y')")}, + {(char *) STRING_WITH_LEN("utf8")} + }, + { + {(char *) STRING_WITH_LEN("Lock_tables_priv")}, + {(char *) STRING_WITH_LEN("enum('N','Y')")}, + {(char *) STRING_WITH_LEN("utf8")} + }, + { + {(char *) STRING_WITH_LEN("Create_view_priv")}, + {(char *) STRING_WITH_LEN("enum('N','Y')")}, + {(char *) STRING_WITH_LEN("utf8")} + }, + { + {(char *) STRING_WITH_LEN("Show_view_priv")}, + {(char *) STRING_WITH_LEN("enum('N','Y')")}, + {(char *) STRING_WITH_LEN("utf8")} + }, + { + {(char *) STRING_WITH_LEN("Create_routine_priv")}, + {(char *) STRING_WITH_LEN("enum('N','Y')")}, + {(char *) STRING_WITH_LEN("utf8")} + }, + { + {(char *) STRING_WITH_LEN("Alter_routine_priv")}, + {(char *) STRING_WITH_LEN("enum('N','Y')")}, + {(char *) STRING_WITH_LEN("utf8")} + }, + { + {(char *) STRING_WITH_LEN("Execute_priv")}, + {(char *) STRING_WITH_LEN("enum('N','Y')")}, + {(char *) STRING_WITH_LEN("utf8")} + }, + { + {(char *) STRING_WITH_LEN("Event_priv")}, + {(char *) STRING_WITH_LEN("enum('N','Y')")}, + {(char *) STRING_WITH_LEN("utf8")} + }, + { + {(char *) STRING_WITH_LEN("Trigger_priv")}, + {(char *) STRING_WITH_LEN("enum('N','Y')")}, + {(char *) STRING_WITH_LEN("utf8")} + } +}; + + #ifndef NO_EMBEDDED_ACCESS_CHECKS +#define FIRST_NON_YN_FIELD 26 + class acl_entry :public hash_filo_element { public: @@ -356,6 +471,20 @@ static my_bool acl_load(THD *thd, TABLE_LIST *tables) if (table->s->fields <= 36 && (user.access & GRANT_ACL)) user.access|= CREATE_USER_ACL; + + /* + if it is pre 5.1.6 privilege table then map CREATE privilege on + CREATE|ALTER|DROP|EXECUTE EVENT + */ + if (table->s->fields <= 37 && (user.access & SUPER_ACL)) + user.access|= EVENT_ACL; + + /* + if it is pre 5.1.6 privilege then map TRIGGER privilege on CREATE. + */ + if (table->s->fields <= 38 && (user.access & SUPER_ACL)) + user.access|= TRIGGER_ACL; + user.sort= get_sort(2,user.host.hostname,user.user); user.hostname_length= (user.host.hostname ? (uint) strlen(user.host.hostname) : 0); @@ -430,14 +559,14 @@ static my_bool acl_load(THD *thd, TABLE_LIST *tables) while (!(read_record_info.read_record(&read_record_info))) { ACL_DB db; - update_hostname(&db.host,get_field(&mem, table->field[0])); - db.db=get_field(&mem, table->field[1]); + update_hostname(&db.host,get_field(&mem, table->field[MYSQL_DB_FIELD_HOST])); + db.db=get_field(&mem, table->field[MYSQL_DB_FIELD_DB]); if (!db.db) { sql_print_warning("Found an entry in the 'db' table with empty database name; Skipped"); continue; } - db.user=get_field(&mem, table->field[2]); + db.user=get_field(&mem, table->field[MYSQL_DB_FIELD_USER]); if (check_no_resolve && hostname_requires_resolving(db.host.hostname)) { sql_print_warning("'db' entry '%s %s@%s' " @@ -1438,7 +1567,7 @@ bool change_password(THD *thd, const char *host, const char *user, GRANT and REVOKE are applied the slave in/exclusion rules as they are some kind of updates to the mysql.% tables. */ - if (thd->slave_thread && table_rules_on) + if (thd->slave_thread && rpl_filter->is_on()) { /* The tables must be marked "updating" so that tables_ok() takes them into @@ -1446,7 +1575,7 @@ bool change_password(THD *thd, const char *host, const char *user, */ tables.updating= 1; /* Thanks to bzero, tables.next==0 */ - if (!tables_ok(thd, &tables)) + if (!(thd->spcont || rpl_filter->tables_ok(0, &tables))) DBUG_RETURN(0); } #endif @@ -1486,8 +1615,7 @@ bool change_password(THD *thd, const char *host, const char *user, acl_user->host.hostname ? acl_user->host.hostname : "", new_password)); thd->clear_error(); - Query_log_event qinfo(thd, buff, query_length, 0, FALSE); - mysql_bin_log.write(&qinfo); + thd->binlog_query(THD::MYSQL_QUERY_TYPE, buff, query_length, FALSE, FALSE); } end: close_thread_tables(thd); @@ -1662,7 +1790,7 @@ static bool update_user_table(THD *thd, TABLE *table, key_copy((byte *) user_key, table->record[0], table->key_info, table->key_info->key_length); - table->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS); + table->file->ha_retrieve_all_cols(); if (table->file->index_read_idx(table->record[0], 0, (byte *) user_key, table->key_info->key_length, HA_READ_KEY_EXACT)) @@ -1673,7 +1801,7 @@ static bool update_user_table(THD *thd, TABLE *table, } store_record(table,record[1]); table->field[2]->store(new_password, new_password_len, system_charset_info); - if ((error=table->file->update_row(table->record[1],table->record[0]))) + if ((error=table->file->ha_update_row(table->record[1],table->record[0]))) { table->file->print_error(error,MYF(0)); /* purecov: deadcode */ DBUG_RETURN(1); @@ -1752,7 +1880,7 @@ static int replace_user_table(THD *thd, TABLE *table, const LEX_USER &combo, key_copy(user_key, table->record[0], table->key_info, table->key_info->key_length); - table->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS); + table->file->ha_retrieve_all_cols(); if (table->file->index_read_idx(table->record[0], 0, user_key, table->key_info->key_length, HA_READ_KEY_EXACT)) @@ -1888,16 +2016,16 @@ static int replace_user_table(THD *thd, TABLE *table, const LEX_USER &combo, We should NEVER delete from the user table, as a uses can still use mysqld even if he doesn't have any privileges in the user table! */ - table->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS); + table->file->ha_retrieve_all_cols(); if (cmp_record(table,record[1]) && - (error=table->file->update_row(table->record[1],table->record[0]))) + (error=table->file->ha_update_row(table->record[1],table->record[0]))) { // This should never happen table->file->print_error(error,MYF(0)); /* purecov: deadcode */ error= -1; /* purecov: deadcode */ goto end; /* purecov: deadcode */ } } - else if ((error=table->file->write_row(table->record[0]))) // insert + else if ((error=table->file->ha_write_row(table->record[0]))) // insert { // This should never happen if (error && error != HA_ERR_FOUND_DUPP_KEY && error != HA_ERR_FOUND_DUPP_UNIQUE) /* purecov: inspected */ @@ -1970,7 +2098,7 @@ static int replace_db_table(TABLE *table, const char *db, key_copy(user_key, table->record[0], table->key_info, table->key_info->key_length); - table->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS); + table->file->ha_retrieve_all_cols(); if (table->file->index_read_idx(table->record[0],0, user_key, table->key_info->key_length, HA_READ_KEY_EXACT)) @@ -2006,17 +2134,18 @@ static int replace_db_table(TABLE *table, const char *db, /* update old existing row */ if (rights) { - table->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS); - if ((error=table->file->update_row(table->record[1],table->record[0]))) + table->file->ha_retrieve_all_cols(); + if ((error=table->file->ha_update_row(table->record[1], + table->record[0]))) goto table_error; /* purecov: deadcode */ } else /* must have been a revoke of all privileges */ { - if ((error = table->file->delete_row(table->record[1]))) + if ((error = table->file->ha_delete_row(table->record[1]))) goto table_error; /* purecov: deadcode */ } } - else if (rights && (error=table->file->write_row(table->record[0]))) + else if (rights && (error=table->file->ha_write_row(table->record[0]))) { if (error && error != HA_ERR_FOUND_DUPP_KEY) /* purecov: inspected */ goto table_error; /* purecov: deadcode */ @@ -2185,7 +2314,7 @@ GRANT_TABLE::GRANT_TABLE(TABLE *form, TABLE *col_privs) key_copy(key, col_privs->record[0], col_privs->key_info, key_prefix_len); col_privs->field[4]->store("",0, &my_charset_latin1); - col_privs->file->ha_index_init(0); + col_privs->file->ha_index_init(0, 1); if (col_privs->file->index_read(col_privs->record[0], (byte*) key, key_prefix_len, HA_READ_KEY_EXACT)) @@ -2239,10 +2368,10 @@ void free_grant_table(GRANT_TABLE *grant_table) /* Search after a matching grant. Prefer exact grants before not exact ones */ static GRANT_NAME *name_hash_search(HASH *name_hash, - const char *host,const char* ip, - const char *db, - const char *user, const char *tname, - bool exact) + const char *host,const char* ip, + const char *db, + const char *user, const char *tname, + bool exact) { char helping [NAME_LEN*2+USERNAME_LENGTH+3]; uint len; @@ -2330,7 +2459,7 @@ static int replace_column_table(GRANT_TABLE *g_t, List_iterator <LEX_COLUMN> iter(columns); class LEX_COLUMN *column; - table->file->ha_index_init(0); + table->file->ha_index_init(0, 1); while ((column= iter++)) { ulong privileges= column->rights; @@ -2345,7 +2474,7 @@ static int replace_column_table(GRANT_TABLE *g_t, key_copy(user_key, table->record[0], table->key_info, table->key_info->key_length); - table->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS); + table->file->ha_retrieve_all_cols(); if (table->file->index_read(table->record[0], user_key, table->key_info->key_length, HA_READ_KEY_EXACT)) @@ -2384,9 +2513,9 @@ static int replace_column_table(GRANT_TABLE *g_t, { GRANT_COLUMN *grant_column; if (privileges) - error=table->file->update_row(table->record[1],table->record[0]); + error=table->file->ha_update_row(table->record[1],table->record[0]); else - error=table->file->delete_row(table->record[1]); + error=table->file->ha_delete_row(table->record[1]); if (error) { table->file->print_error(error,MYF(0)); /* purecov: inspected */ @@ -2401,7 +2530,7 @@ static int replace_column_table(GRANT_TABLE *g_t, else // new grant { GRANT_COLUMN *grant_column; - if ((error=table->file->write_row(table->record[0]))) + if ((error=table->file->ha_write_row(table->record[0]))) { table->file->print_error(error,MYF(0)); /* purecov: inspected */ result= -1; /* purecov: inspected */ @@ -2423,7 +2552,7 @@ static int replace_column_table(GRANT_TABLE *g_t, key_copy(user_key, table->record[0], table->key_info, key_prefix_length); - table->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS); + table->file->ha_retrieve_all_cols(); if (table->file->index_read(table->record[0], user_key, key_prefix_length, HA_READ_KEY_EXACT)) @@ -2453,8 +2582,8 @@ static int replace_column_table(GRANT_TABLE *g_t, if (privileges) { int tmp_error; - if ((tmp_error=table->file->update_row(table->record[1], - table->record[0]))) + if ((tmp_error=table->file->ha_update_row(table->record[1], + table->record[0]))) { /* purecov: deadcode */ table->file->print_error(tmp_error,MYF(0)); /* purecov: deadcode */ result= -1; /* purecov: deadcode */ @@ -2466,7 +2595,7 @@ static int replace_column_table(GRANT_TABLE *g_t, else { int tmp_error; - if ((tmp_error = table->file->delete_row(table->record[1]))) + if ((tmp_error = table->file->ha_delete_row(table->record[1]))) { /* purecov: deadcode */ table->file->print_error(tmp_error,MYF(0)); /* purecov: deadcode */ result= -1; /* purecov: deadcode */ @@ -2522,7 +2651,7 @@ static int replace_table_table(THD *thd, GRANT_TABLE *grant_table, key_copy(user_key, table->record[0], table->key_info, table->key_info->key_length); - table->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS); + table->file->ha_retrieve_all_cols(); if (table->file->index_read_idx(table->record[0], 0, user_key, table->key_info->key_length, HA_READ_KEY_EXACT)) @@ -2574,15 +2703,15 @@ static int replace_table_table(THD *thd, GRANT_TABLE *grant_table, { if (store_table_rights || store_col_rights) { - if ((error=table->file->update_row(table->record[1],table->record[0]))) + if ((error=table->file->ha_update_row(table->record[1],table->record[0]))) goto table_error; /* purecov: deadcode */ } - else if ((error = table->file->delete_row(table->record[1]))) + else if ((error = table->file->ha_delete_row(table->record[1]))) goto table_error; /* purecov: deadcode */ } else { - error=table->file->write_row(table->record[0]); + error=table->file->ha_write_row(table->record[0]); if (error && error != HA_ERR_FOUND_DUPP_KEY) goto table_error; /* purecov: deadcode */ } @@ -2691,15 +2820,15 @@ static int replace_routine_table(THD *thd, GRANT_NAME *grant_name, { if (store_proc_rights) { - if ((error=table->file->update_row(table->record[1],table->record[0]))) + if ((error=table->file->ha_update_row(table->record[1],table->record[0]))) goto table_error; } - else if ((error= table->file->delete_row(table->record[1]))) + else if ((error= table->file->ha_delete_row(table->record[1]))) goto table_error; } else { - error=table->file->write_row(table->record[0]); + error=table->file->ha_write_row(table->record[0]); if (error && error != HA_ERR_FOUND_DUPP_KEY) goto table_error; } @@ -2800,9 +2929,10 @@ bool mysql_table_grant(THD *thd, TABLE_LIST *table_list, if (!(rights & CREATE_ACL)) { char buf[FN_REFLEN]; - sprintf(buf,"%s/%s/%s.frm",mysql_data_home, table_list->db, - table_list->table_name); - fn_format(buf,buf,"","",4+16+32); + build_table_filename(buf, sizeof(buf), table_list->db, + table_list->table_name, reg_ext); + fn_format(buf, buf, "", "", MY_UNPACK_FILENAME | MY_RESOLVE_SYMLINKS | + MY_RETURN_REAL_PATH | MY_APPEND_EXT); if (access(buf,F_OK)) { my_error(ER_NO_SUCH_TABLE, MYF(0), table_list->db, table_list->alias); @@ -2843,14 +2973,15 @@ bool mysql_table_grant(THD *thd, TABLE_LIST *table_list, GRANT and REVOKE are applied the slave in/exclusion rules as they are some kind of updates to the mysql.% tables. */ - if (thd->slave_thread && table_rules_on) + if (thd->slave_thread && rpl_filter->is_on()) + if (thd->slave_thread && rpl_filter->is_on()) { /* The tables must be marked "updating" so that tables_ok() takes them into account in tests. */ tables[0].updating= tables[1].updating= tables[2].updating= 1; - if (!tables_ok(thd, tables)) + if (!(thd->spcont || rpl_filter->tables_ok(0, tables))) DBUG_RETURN(FALSE); } #endif @@ -3050,14 +3181,14 @@ bool mysql_routine_grant(THD *thd, TABLE_LIST *table_list, bool is_proc, GRANT and REVOKE are applied the slave in/exclusion rules as they are some kind of updates to the mysql.% tables. */ - if (thd->slave_thread && table_rules_on) + if (thd->slave_thread && rpl_filter->is_on()) { /* The tables must be marked "updating" so that tables_ok() takes them into account in tests. */ tables[0].updating= tables[1].updating= 1; - if (!tables_ok(thd, tables)) + if (!(thd->spcont || rpl_filter->tables_ok(0, tables))) DBUG_RETURN(FALSE); } #endif @@ -3137,6 +3268,16 @@ bool mysql_routine_grant(THD *thd, TABLE_LIST *table_list, bool is_proc, } grant_option=TRUE; thd->mem_root= old_root; + /* + This flush is here only becuase there is code that writes rows to + system tables after executing a binlog_query(). + + TODO: Ensure that no writes are executed after a binlog_query() by + moving the writes to before calling binlog_query(). Then remove + this line (and add an assert inside send_ok() that checks that + everything is in a consistent state). + */ + thd->binlog_flush_pending_rows_event(true); rw_unlock(&LOCK_grant); if (!result && !no_error) send_ok(thd); @@ -3181,14 +3322,14 @@ bool mysql_grant(THD *thd, const char *db, List <LEX_USER> &list, GRANT and REVOKE are applied the slave in/exclusion rules as they are some kind of updates to the mysql.% tables. */ - if (thd->slave_thread && table_rules_on) + if (thd->slave_thread && rpl_filter->is_on()) { /* The tables must be marked "updating" so that tables_ok() takes them into account in tests. */ tables[0].updating= tables[1].updating= 1; - if (!tables_ok(thd, tables)) + if (!(thd->spcont || rpl_filter->tables_ok(0, tables))) DBUG_RETURN(FALSE); } #endif @@ -3332,8 +3473,8 @@ static my_bool grant_load(TABLE_LIST *tables) t_table = tables[0].table; c_table = tables[1].table; p_table= tables[2].table; - t_table->file->ha_index_init(0); - p_table->file->ha_index_init(0); + t_table->file->ha_index_init(0, 1); + p_table->file->ha_index_init(0, 1); if (!t_table->file->index_first(t_table->record[0])) { memex_ptr= &memex; @@ -3745,8 +3886,8 @@ bool check_column_grant_in_table_ref(THD *thd, TABLE_LIST * table_ref, /* Normal or temporary table. */ TABLE *table= table_ref->table; grant= &(table->grant); - db_name= table->s->db; - table_name= table->s->table_name; + db_name= table->s->db.str; + table_name= table->s->table_name.str; } if (grant->want_privilege) @@ -4053,13 +4194,13 @@ static const char *command_array[]= "ALTER", "SHOW DATABASES", "SUPER", "CREATE TEMPORARY TABLES", "LOCK TABLES", "EXECUTE", "REPLICATION SLAVE", "REPLICATION CLIENT", "CREATE VIEW", "SHOW VIEW", "CREATE ROUTINE", "ALTER ROUTINE", - "CREATE USER" + "CREATE USER", "EVENT", "TRIGGER" }; static uint command_lengths[]= { 6, 6, 6, 6, 6, 4, 6, 8, 7, 4, 5, 10, 5, 5, 14, 5, 23, 11, 7, 17, 18, 11, 9, - 14, 13, 11 + 14, 13, 11, 5, 7 }; @@ -4604,7 +4745,7 @@ int open_grant_tables(THD *thd, TABLE_LIST *tables) GRANT and REVOKE are applied the slave in/exclusion rules as they are some kind of updates to the mysql.% tables. */ - if (thd->slave_thread && table_rules_on) + if (thd->slave_thread && rpl_filter->is_on()) { /* The tables must be marked "updating" so that tables_ok() takes them into @@ -4612,7 +4753,7 @@ int open_grant_tables(THD *thd, TABLE_LIST *tables) */ tables[0].updating=tables[1].updating=tables[2].updating= tables[3].updating=tables[4].updating=1; - if (!tables_ok(thd, tables)) + if (!(thd->spcont || rpl_filter->tables_ok(0, tables))) DBUG_RETURN(1); tables[0].updating=tables[1].updating=tables[2].updating= tables[3].updating=tables[4].updating=0;; @@ -4688,13 +4829,13 @@ static int modify_grant_table(TABLE *table, Field *host_field, system_charset_info); user_field->store(user_to->user.str, user_to->user.length, system_charset_info); - if ((error= table->file->update_row(table->record[1], table->record[0]))) + if ((error= table->file->ha_update_row(table->record[1], table->record[0]))) table->file->print_error(error, MYF(0)); } else { /* delete */ - if ((error=table->file->delete_row(table->record[0]))) + if ((error=table->file->ha_delete_row(table->record[0]))) table->file->print_error(error, MYF(0)); } @@ -4762,7 +4903,7 @@ static int handle_grant_table(TABLE_LIST *tables, uint table_no, bool drop, by the searched record, if it exists. */ DBUG_PRINT("info",("read table: '%s' search: '%s'@'%s'", - table->s->table_name, user_str, host_str)); + table->s->table_name.str, user_str, host_str)); host_field->store(host_str, user_from->host.length, system_charset_info); user_field->store(user_str, user_from->user.length, system_charset_info); @@ -4774,7 +4915,7 @@ static int handle_grant_table(TABLE_LIST *tables, uint table_no, bool drop, user_key, key_prefix_length, HA_READ_KEY_EXACT))) { - if (error != HA_ERR_KEY_NOT_FOUND) + if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) { table->file->print_error(error, MYF(0)); result= -1; @@ -4805,7 +4946,7 @@ static int handle_grant_table(TABLE_LIST *tables, uint table_no, bool drop, { #ifdef EXTRA_DEBUG DBUG_PRINT("info",("scan table: '%s' search: '%s'@'%s'", - table->s->table_name, user_str, host_str)); + table->s->table_name.str, user_str, host_str)); #endif while ((error= table->file->rnd_next(table->record[0])) != HA_ERR_END_OF_FILE) @@ -5705,7 +5846,7 @@ void update_schema_privilege(TABLE *table, char *buff, const char* db, table->field[i++]->store(column, col_length, cs); table->field[i++]->store(priv, priv_length, cs); table->field[i]->store(is_grantable, strlen(is_grantable), cs); - table->file->write_row(table->record[0]); + table->file->ha_write_row(table->record[0]); } diff --git a/sql/sql_acl.h b/sql/sql_acl.h index c8fadb73b0c..8c64adbbece 100644 --- a/sql/sql_acl.h +++ b/sql/sql_acl.h @@ -14,6 +14,8 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include "slave.h" // for tables_ok(), rpl_filter + #define SELECT_ACL (1L << 0) #define INSERT_ACL (1L << 1) #define UPDATE_ACL (1L << 2) @@ -40,27 +42,29 @@ #define CREATE_PROC_ACL (1L << 23) #define ALTER_PROC_ACL (1L << 24) #define CREATE_USER_ACL (1L << 25) +#define EVENT_ACL (1L << 26) +#define TRIGGER_ACL (1L << 27) /* don't forget to update 1. static struct show_privileges_st sys_privileges[] 2. static const char *command_array[] and static uint command_lengths[] 3. mysql_create_system_tables.sh, mysql_fix_privilege_tables.sql + and mysql-test/lib/init_db.sql 4. acl_init() or whatever - to define behaviour for old privilege tables 5. sql_yacc.yy - for GRANT/REVOKE to work */ #define EXTRA_ACL (1L << 29) #define NO_ACCESS (1L << 30) - #define DB_ACLS \ (UPDATE_ACL | SELECT_ACL | INSERT_ACL | DELETE_ACL | CREATE_ACL | DROP_ACL | \ GRANT_ACL | REFERENCES_ACL | INDEX_ACL | ALTER_ACL | CREATE_TMP_ACL | \ LOCK_TABLES_ACL | EXECUTE_ACL | CREATE_VIEW_ACL | SHOW_VIEW_ACL | \ - CREATE_PROC_ACL | ALTER_PROC_ACL) + CREATE_PROC_ACL | ALTER_PROC_ACL | EVENT_ACL | TRIGGER_ACL) #define TABLE_ACLS \ (SELECT_ACL | INSERT_ACL | UPDATE_ACL | DELETE_ACL | CREATE_ACL | DROP_ACL | \ GRANT_ACL | REFERENCES_ACL | INDEX_ACL | ALTER_ACL | CREATE_VIEW_ACL | \ - SHOW_VIEW_ACL) + SHOW_VIEW_ACL | TRIGGER_ACL) #define COL_ACLS \ (SELECT_ACL | INSERT_ACL | UPDATE_ACL | REFERENCES_ACL) @@ -77,7 +81,7 @@ REFERENCES_ACL | INDEX_ACL | ALTER_ACL | SHOW_DB_ACL | SUPER_ACL | \ CREATE_TMP_ACL | LOCK_TABLES_ACL | REPL_SLAVE_ACL | REPL_CLIENT_ACL | \ EXECUTE_ACL | CREATE_VIEW_ACL | SHOW_VIEW_ACL | CREATE_PROC_ACL | \ - ALTER_PROC_ACL | CREATE_USER_ACL) + ALTER_PROC_ACL | CREATE_USER_ACL | EVENT_ACL | TRIGGER_ACL) #define DEFAULT_CREATE_PROC_ACLS \ (ALTER_PROC_ACL | EXECUTE_ACL) @@ -95,26 +99,32 @@ #define DB_CHUNK3 (CREATE_VIEW_ACL | SHOW_VIEW_ACL | \ CREATE_PROC_ACL | ALTER_PROC_ACL ) #define DB_CHUNK4 (EXECUTE_ACL) +#define DB_CHUNK5 (EVENT_ACL | TRIGGER_ACL) #define fix_rights_for_db(A) (((A) & DB_CHUNK0) | \ (((A) << 4) & DB_CHUNK1) | \ (((A) << 6) & DB_CHUNK2) | \ (((A) << 9) & DB_CHUNK3) | \ - (((A) << 2) & DB_CHUNK4)) + (((A) << 2) & DB_CHUNK4))| \ + (((A) << 9) & DB_CHUNK5) #define get_rights_for_db(A) (((A) & DB_CHUNK0) | \ (((A) & DB_CHUNK1) >> 4) | \ (((A) & DB_CHUNK2) >> 6) | \ (((A) & DB_CHUNK3) >> 9) | \ - (((A) & DB_CHUNK4) >> 2)) + (((A) & DB_CHUNK4) >> 2))| \ + (((A) & DB_CHUNK5) >> 9) #define TBL_CHUNK0 DB_CHUNK0 #define TBL_CHUNK1 DB_CHUNK1 #define TBL_CHUNK2 (CREATE_VIEW_ACL | SHOW_VIEW_ACL) +#define TBL_CHUNK3 TRIGGER_ACL #define fix_rights_for_table(A) (((A) & TBL_CHUNK0) | \ (((A) << 4) & TBL_CHUNK1) | \ - (((A) << 11) & TBL_CHUNK2)) + (((A) << 11) & TBL_CHUNK2) | \ + (((A) << 15) & TBL_CHUNK3)) #define get_rights_for_table(A) (((A) & TBL_CHUNK0) | \ (((A) & TBL_CHUNK1) >> 4) | \ - (((A) & TBL_CHUNK2) >> 11)) + (((A) & TBL_CHUNK2) >> 11) | \ + (((A) & TBL_CHUNK3) >> 15)) #define fix_rights_for_column(A) (((A) & 7) | (((A) & ~7) << 8)) #define get_rights_for_column(A) (((A) & 7) | ((A) >> 8)) #define fix_rights_for_procedure(A) ((((A) << 18) & EXECUTE_ACL) | \ @@ -124,6 +134,36 @@ (((A) & ALTER_PROC_ACL) >> 23) | \ (((A) & GRANT_ACL) >> 8)) +enum mysql_db_table_field +{ + MYSQL_DB_FIELD_HOST = 0, + MYSQL_DB_FIELD_DB, + MYSQL_DB_FIELD_USER, + MYSQL_DB_FIELD_SELECT_PRIV, + MYSQL_DB_FIELD_INSERT_PRIV, + MYSQL_DB_FIELD_UPDATE_PRIV, + MYSQL_DB_FIELD_DELETE_PRIV, + MYSQL_DB_FIELD_CREATE_PRIV, + MYSQL_DB_FIELD_DROP_PRIV, + MYSQL_DB_FIELD_GRANT_PRIV, + MYSQL_DB_FIELD_REFERENCES_PRIV, + MYSQL_DB_FIELD_INDEX_PRIV, + MYSQL_DB_FIELD_ALTER_PRIV, + MYSQL_DB_FIELD_CREATE_TMP_TABLE_PRIV, + MYSQL_DB_FIELD_LOCK_TABLES_PRIV, + MYSQL_DB_FIELD_CREATE_VIEW_PRIV, + MYSQL_DB_FIELD_SHOW_VIEW_PRIV, + MYSQL_DB_FIELD_CREATE_ROUTINE_PRIV, + MYSQL_DB_FIELD_ALTER_ROUTINE_PRIV, + MYSQL_DB_FIELD_EXECUTE_PRIV, + MYSQL_DB_FIELD_EVENT_PRIV, + MYSQL_DB_FIELD_TRIGGER_PRIV, + MYSQL_DB_FIELD_COUNT +}; + +extern TABLE_FIELD_W_TYPE mysql_db_table_fields[]; +extern time_t mysql_db_table_last_check; + /* Classes */ struct acl_host_and_ip diff --git a/sql/sql_base.cc b/sql/sql_base.cc index 2226ab926d1..26d81eccce6 100644 --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -31,26 +31,35 @@ TABLE *unused_tables; /* Used by mysql_test */ HASH open_cache; /* Used by mysql_test */ - -static int open_unireg_entry(THD *thd, TABLE *entry, const char *db, - const char *name, const char *alias, - TABLE_LIST *table_list, MEM_ROOT *mem_root); +static HASH table_def_cache; +static TABLE_SHARE *oldest_unused_share, end_of_unused_share; +static pthread_mutex_t LOCK_table_share; +static bool table_def_inited= 0; + +static int open_unireg_entry(THD *thd, TABLE *entry, TABLE_LIST *table_list, + const char *alias, + char *cache_key, uint cache_key_length, + MEM_ROOT *mem_root); static void free_cache_entry(TABLE *entry); static void mysql_rm_tmp_tables(void); -static bool open_new_frm(THD *thd, const char *path, const char *alias, - const char *db, const char *table_name, +static bool open_new_frm(THD *thd, TABLE_SHARE *share, const char *alias, uint db_stat, uint prgflag, uint ha_open_flags, TABLE *outparam, TABLE_LIST *table_desc, MEM_ROOT *mem_root); +static void close_old_data_files(THD *thd, TABLE *table, bool abort_locks, + bool send_refresh); +static bool reopen_table(TABLE *table); + extern "C" byte *table_cache_key(const byte *record,uint *length, my_bool not_used __attribute__((unused))) { TABLE *entry=(TABLE*) record; - *length= entry->s->key_length; - return (byte*) entry->s->table_cache_key; + *length= entry->s->table_cache_key.length; + return (byte*) entry->s->table_cache_key.str; } + bool table_cache_init(void) { mysql_rm_tmp_tables(); @@ -62,21 +71,25 @@ bool table_cache_init(void) void table_cache_free(void) { DBUG_ENTER("table_cache_free"); - close_cached_tables((THD*) 0,0,(TABLE_LIST*) 0); - if (!open_cache.records) // Safety first - hash_free(&open_cache); + if (table_def_inited) + { + close_cached_tables((THD*) 0,0,(TABLE_LIST*) 0); + if (!open_cache.records) // Safety first + hash_free(&open_cache); + } DBUG_VOID_RETURN; } -uint cached_tables(void) +uint cached_open_tables(void) { return open_cache.records; } + #ifdef EXTRA_DEBUG static void check_unused(void) { - uint count=0,idx=0; + uint count= 0, open_files= 0, idx= 0; TABLE *cur_link,*start_link; if ((start_link=cur_link=unused_tables)) @@ -100,17 +113,560 @@ static void check_unused(void) TABLE *entry=(TABLE*) hash_element(&open_cache,idx); if (!entry->in_use) count--; + if (entry->file) + open_files++; } if (count != 0) { DBUG_PRINT("error",("Unused_links doesn't match open_cache: diff: %d", /* purecov: inspected */ count)); /* purecov: inspected */ } + +#ifdef NOT_SAFE_FOR_REPAIR + /* + check that open cache and table definition cache has same number of + aktive tables + */ + count= 0; + for (idx=0 ; idx < table_def_cache.records ; idx++) + { + TABLE_SHARE *entry= (TABLE_SHARE*) hash_element(&table_def_cache,idx); + count+= entry->ref_count; + } + if (count != open_files) + { + DBUG_PRINT("error", ("table_def ref_count: %u open_cache: %u", + count, open_files)); + DBUG_ASSERT(count == open_files); + } +#endif } #else #define check_unused() #endif + +/* + Create a table cache key + + SYNOPSIS + create_table_def_key() + thd Thread handler + key Create key here (must be of size MAX_DBKEY_LENGTH) + table_list Table definition + tmp_table Set if table is a tmp table + + IMPLEMENTATION + The table cache_key is created from: + db_name + \0 + table_name + \0 + + if the table is a tmp table, we add the following to make each tmp table + unique on the slave: + + 4 bytes for master thread id + 4 bytes pseudo thread id + + RETURN + Length of key +*/ + +uint create_table_def_key(THD *thd, char *key, TABLE_LIST *table_list, + bool tmp_table) +{ + uint key_length= (uint) (strmov(strmov(key, table_list->db)+1, + table_list->table_name)-key)+1; + if (tmp_table) + { + int4store(key + key_length, thd->server_id); + int4store(key + key_length + 4, thd->variables.pseudo_thread_id); + key_length+= TMP_TABLE_KEY_EXTRA; + } + return key_length; +} + + + +/***************************************************************************** + Functions to handle table definition cach (TABLE_SHARE) +*****************************************************************************/ + +extern "C" byte *table_def_key(const byte *record, uint *length, + my_bool not_used __attribute__((unused))) +{ + TABLE_SHARE *entry=(TABLE_SHARE*) record; + *length= entry->table_cache_key.length; + return (byte*) entry->table_cache_key.str; +} + + +static void table_def_free_entry(TABLE_SHARE *share) +{ + DBUG_ENTER("table_def_free_entry"); + if (share->prev) + { + /* remove from old_unused_share list */ + pthread_mutex_lock(&LOCK_table_share); + *share->prev= share->next; + share->next->prev= share->prev; + pthread_mutex_unlock(&LOCK_table_share); + } + free_table_share(share); + DBUG_VOID_RETURN; +} + + +bool table_def_init(void) +{ + table_def_inited= 1; + pthread_mutex_init(&LOCK_table_share, MY_MUTEX_INIT_FAST); + oldest_unused_share= &end_of_unused_share; + end_of_unused_share.prev= &oldest_unused_share; + + return hash_init(&table_def_cache, &my_charset_bin, table_def_size, + 0, 0, table_def_key, + (hash_free_key) table_def_free_entry, 0) != 0; +} + + +void table_def_free(void) +{ + DBUG_ENTER("table_def_free"); + if (table_def_inited) + { + table_def_inited= 0; + pthread_mutex_destroy(&LOCK_table_share); + hash_free(&table_def_cache); + } + DBUG_VOID_RETURN; +} + + +uint cached_table_definitions(void) +{ + return table_def_cache.records; +} + + +/* + Get TABLE_SHARE for a table. + + get_table_share() + thd Table share + table_list Table that should be opened + key Table cache key + key_length Length of key + db_flags Flags to open_table_def(): + OPEN_VIEW + error out: Error code from open_table_def() + + IMPLEMENTATION + Get a table definition from the table definition cache. + If it doesn't exist, create a new from the table definition file. + + NOTES + We must have wrlock on LOCK_open when we come here + (To be changed later) + + RETURN + 0 Error + # Share for table +*/ + +TABLE_SHARE *get_table_share(THD *thd, TABLE_LIST *table_list, char *key, + uint key_length, uint db_flags, int *error) +{ + TABLE_SHARE *share; + DBUG_ENTER("get_table_share"); + + *error= 0; + + /* Read table definition from cache */ + if ((share= (TABLE_SHARE*) hash_search(&table_def_cache,(byte*) key, + key_length))) + goto found; + + if (!(share= alloc_table_share(table_list, key, key_length))) + { +#ifdef NOT_YET + pthread_mutex_unlock(&LOCK_open); +#endif + DBUG_RETURN(0); + } + +#ifdef NOT_YET + // We need a write lock to be able to add a new entry + pthread_mutex_unlock(&LOCK_open); + pthread_mutex_lock(&LOCK_open); + /* Check that another thread didn't insert the same table in between */ + if ((old_share= hash_search(&table_def_cache, (byte*) key, key_length))) + { + (void) pthread_mutex_lock(&share->mutex); + free_table_share(share); + share= old_share; + goto found; + } +#endif + + /* + Lock mutex to be able to read table definition from file without + conflicts + */ + (void) pthread_mutex_lock(&share->mutex); + + /* + We assign a new table id under the protection of the LOCK_open and + the share's own mutex. We do this insted of creating a new mutex + and using it for the sole purpose of serializing accesses to a + static variable, we assign the table id here. We assign it to the + share before inserting it into the table_def_cache to be really + sure that it cannot be read from the cache without having a table + id assigned. + + CAVEAT. This means that the table cannot be used for + binlogging/replication purposes, unless get_table_share() has been + called directly or indirectly. + */ + assign_new_table_id(share); + + if (my_hash_insert(&table_def_cache, (byte*) share)) + { +#ifdef NOT_YET + pthread_mutex_unlock(&LOCK_open); + (void) pthread_mutex_unlock(&share->mutex); +#endif + free_table_share(share); + DBUG_RETURN(0); // return error + } +#ifdef NOT_YET + pthread_mutex_unlock(&LOCK_open); +#endif + if (open_table_def(thd, share, db_flags)) + { +#ifdef NOT_YET + /* + No such table or wrong table definition file + Lock first the table cache and then the mutex. + This will ensure that no other thread is using the share + structure. + */ + (void) pthread_mutex_unlock(&share->mutex); + (void) pthread_mutex_lock(&LOCK_open); + (void) pthread_mutex_lock(&share->mutex); +#endif + *error= share->error; + (void) hash_delete(&table_def_cache, (byte*) share); + DBUG_RETURN(0); + } + share->ref_count++; // Mark in use + DBUG_PRINT("exit", ("share: 0x%lx ref_count: %u", + (ulong) share, share->ref_count)); + (void) pthread_mutex_unlock(&share->mutex); + DBUG_RETURN(share); + +found: + /* + We found an existing table definition. Return it if we didn't get + an error when reading the table definition from file. + */ + + /* We must do a lock to ensure that the structure is initialized */ + (void) pthread_mutex_lock(&share->mutex); +#ifdef NOT_YET + pthread_mutex_unlock(&LOCK_open); +#endif + if (share->error) + { + /* Table definition contained an error */ + open_table_error(share, share->error, share->open_errno, share->errarg); + (void) pthread_mutex_unlock(&share->mutex); + DBUG_RETURN(0); + } + if (share->is_view && !(db_flags & OPEN_VIEW)) + { + open_table_error(share, 1, ENOENT, 0); + (void) pthread_mutex_unlock(&share->mutex); + DBUG_RETURN(0); + } + + if (!share->ref_count++ && share->prev) + { + /* + Share was not used before and it was in the old_unused_share list + Unlink share from this list + */ + DBUG_PRINT("info", ("Unlinking from not used list")); + pthread_mutex_lock(&LOCK_table_share); + *share->prev= share->next; + share->next->prev= share->prev; + share->next= 0; + share->prev= 0; + pthread_mutex_unlock(&LOCK_table_share); + } + (void) pthread_mutex_unlock(&share->mutex); + + /* Free cache if too big */ + while (table_def_cache.records > table_def_size && + oldest_unused_share->next) + { + pthread_mutex_lock(&oldest_unused_share->mutex); + VOID(hash_delete(&table_def_cache, (byte*) oldest_unused_share)); + } + + DBUG_PRINT("exit", ("share: 0x%lx ref_count: %u", + (ulong) share, share->ref_count)); + DBUG_RETURN(share); +} + + +/* + Get a table share. If it didn't exist, try creating it from engine + + For arguments and return values, see get_table_from_share() +*/ + +static TABLE_SHARE +*get_table_share_with_create(THD *thd, TABLE_LIST *table_list, + char *key, uint key_length, + uint db_flags, int *error) +{ + TABLE_SHARE *share; + int tmp; + DBUG_ENTER("get_table_share_with_create"); + + if ((share= get_table_share(thd, table_list, key, key_length, + db_flags, error)) || + thd->net.last_errno != ER_NO_SUCH_TABLE) + DBUG_RETURN(share); + + /* Table didn't exist. Check if some engine can provide it */ + if ((tmp= ha_create_table_from_engine(thd, table_list->db, + table_list->table_name)) < 0) + { + /* + No such table in any engine. + Hide "Table doesn't exist" errors if table belong to view + */ + if (table_list->belong_to_view) + { + TABLE_LIST *view= table_list->belong_to_view; + thd->clear_error(); + my_error(ER_VIEW_INVALID, MYF(0), + view->view_db.str, view->view_name.str); + } + DBUG_RETURN(0); + } + if (tmp) + { + /* Give right error message */ + thd->clear_error(); + DBUG_PRINT("error", ("Discovery of %s/%s failed", table_list->db, + table_list->table_name)); + my_printf_error(ER_UNKNOWN_ERROR, + "Failed to open '%-.64s', error while " + "unpacking from engine", + MYF(0), table_list->table_name); + DBUG_RETURN(0); + } + /* Table existed in engine. Let's open it */ + mysql_reset_errors(thd, 1); // Clear warnings + thd->clear_error(); // Clear error message + DBUG_RETURN(get_table_share(thd, table_list, key, key_length, + db_flags, error)); +} + + +/* + Mark that we are not using table share anymore. + + SYNOPSIS + release_table_share() + share Table share + release_type How the release should be done: + RELEASE_NORMAL + - Release without checking + RELEASE_WAIT_FOR_DROP + - Don't return until we get a signal that the + table is deleted or the thread is killed. + + IMPLEMENTATION + If ref_count goes to zero and (we have done a refresh or if we have + already too many open table shares) then delete the definition. + + If type == RELEASE_WAIT_FOR_DROP then don't return until we get a signal + that the table is deleted or the thread is killed. +*/ + +void release_table_share(TABLE_SHARE *share, enum release_type type) +{ + bool to_be_deleted= 0; + DBUG_ENTER("release_table_share"); + DBUG_PRINT("enter", + ("share: 0x%lx table: %s.%s ref_count: %u version: %lu", + (ulong) share, share->db.str, share->table_name.str, + share->ref_count, share->version)); + + safe_mutex_assert_owner(&LOCK_open); + + pthread_mutex_lock(&share->mutex); + if (!--share->ref_count) + { + if (share->version != refresh_version) + to_be_deleted=1; + else + { + /* Link share last in used_table_share list */ + DBUG_PRINT("info",("moving share to unused list")); + + DBUG_ASSERT(share->next == 0); + pthread_mutex_lock(&LOCK_table_share); + share->prev= end_of_unused_share.prev; + *end_of_unused_share.prev= share; + end_of_unused_share.prev= &share->next; + share->next= &end_of_unused_share; + pthread_mutex_unlock(&LOCK_table_share); + + to_be_deleted= (table_def_cache.records > table_def_size); + } + } + + if (to_be_deleted) + { + DBUG_PRINT("info", ("Deleting share")); + hash_delete(&table_def_cache, (byte*) share); + DBUG_VOID_RETURN; + } + pthread_mutex_unlock(&share->mutex); + DBUG_VOID_RETURN; + + +#ifdef NOT_YET + if (to_be_deleted) + { + /* + We must try again with new locks as we must get LOCK_open + before share->mutex + */ + pthread_mutex_unlock(&share->mutex); + pthread_mutex_lock(&LOCK_open); + pthread_mutex_lock(&share->mutex); + if (!share->ref_count) + { // No one is using this now + TABLE_SHARE *name_lock; + if (share->replace_with_name_lock && (name_lock=get_name_lock(share))) + { + /* + This code is execured when someone does FLUSH TABLES while on has + locked tables. + */ + (void) hash_search(&def_cache,(byte*) key,key_length); + hash_replace(&def_cache, def_cache.current_record,(byte*) name_lock); + } + else + { + /* Remove table definition */ + hash_delete(&def_cache,(byte*) share); + } + pthread_mutex_unlock(&LOCK_open); + free_table_share(share); + } + else + { + pthread_mutex_unlock(&LOCK_open); + if (type == RELEASE_WAIT_FOR_DROP) + wait_for_table(share, "Waiting for close"); + else + pthread_mutex_unlock(&share->mutex); + } + } + else if (type == RELEASE_WAIT_FOR_DROP) + wait_for_table(share, "Waiting for close"); + else + pthread_mutex_unlock(&share->mutex); +#endif +} + + +/* + Check if table definition exits in cache + + SYNOPSIS + get_cached_table_share() + db Database name + table_name Table name + + RETURN + 0 Not cached + # TABLE_SHARE for table +*/ + +TABLE_SHARE *get_cached_table_share(const char *db, const char *table_name) +{ + char key[NAME_LEN*2+2]; + TABLE_LIST table_list; + uint key_length; + safe_mutex_assert_owner(&LOCK_open); + + table_list.db= (char*) db; + table_list.table_name= (char*) table_name; + key_length= create_table_def_key((THD*) 0, key, &table_list, 0); + return (TABLE_SHARE*) hash_search(&table_def_cache,(byte*) key, key_length); +} + + +/* + Close file handle, but leave the table in the table cache + + SYNOPSIS + close_handle_and_leave_table_as_lock() + table Table handler + + NOTES + By leaving the table in the table cache, it disallows any other thread + to open the table + + thd->killed will be set if we run out of memory +*/ + + +static void close_handle_and_leave_table_as_lock(TABLE *table) +{ + TABLE_SHARE *share, *old_share= table->s; + MEM_ROOT *mem_root= &table->mem_root; + DBUG_ENTER("close_handle_and_leave_table_as_lock"); + + /* + Make a local copy of the table share and free the current one. + This has to be done to ensure that the table share is removed from + the table defintion cache as soon as the last instance is removed + */ + if ((share= (TABLE_SHARE*) alloc_root(mem_root, sizeof(*share)))) + { + bzero((char*) share, sizeof(*share)); + share->db.str= memdup_root(mem_root, old_share->db.str, + old_share->db.length+1); + share->db.length= old_share->db.length; + share->table_name.str= memdup_root(mem_root, + old_share->table_name.str, + old_share->table_name.length+1); + share->table_name.length= old_share->table_name.length; + share->table_cache_key.str= memdup_root(mem_root, + old_share->table_cache_key.str, + old_share->table_cache_key.length); + share->table_cache_key.length= old_share->table_cache_key.length; + share->tmp_table= INTERNAL_TMP_TABLE; // for intern_close_table() + } + + table->file->close(); + table->db_stat= 0; // Mark file closed + release_table_share(table->s, RELEASE_NORMAL); + table->s= share; + + DBUG_VOID_RETURN; +} + + + /* Create a list for all open tables matching SQL expression @@ -147,17 +703,14 @@ OPEN_TABLE_LIST *list_open_tables(THD *thd, const char *db, const char *wild) TABLE *entry=(TABLE*) hash_element(&open_cache,idx); TABLE_SHARE *share= entry->s; - DBUG_ASSERT(share->table_name != 0); - if ((!share->table_name)) // To be removed - continue; // Shouldn't happen - if (db && my_strcasecmp(system_charset_info, db, share->db)) + if (db && my_strcasecmp(system_charset_info, db, share->db.str)) continue; - if (wild && wild_compare(share->table_name,wild,0)) + if (wild && wild_compare(share->table_name.str, wild, 0)) continue; /* Check if user has SELECT privilege for any column in the table */ - table_list.db= (char*) share->db; - table_list.table_name= (char*) share->table_name; + table_list.db= share->db.str; + table_list.table_name= share->table_name.str; table_list.grant.privilege=0; if (check_table_access(thd,SELECT_ACL | EXTRA_ACL,&table_list,1)) @@ -165,8 +718,8 @@ OPEN_TABLE_LIST *list_open_tables(THD *thd, const char *db, const char *wild) /* need to check if we haven't already listed it */ for (table= open_list ; table ; table=table->next) { - if (!strcmp(table->table,share->table_name) && - !strcmp(table->db,entry->s->db)) + if (!strcmp(table->table, share->table_name.str) && + !strcmp(table->db, share->db.str)) { if (entry->in_use) table->in_use++; @@ -178,15 +731,15 @@ OPEN_TABLE_LIST *list_open_tables(THD *thd, const char *db, const char *wild) if (table) continue; if (!(*start_list = (OPEN_TABLE_LIST *) - sql_alloc(sizeof(**start_list)+share->key_length))) + sql_alloc(sizeof(**start_list)+share->table_cache_key.length))) { open_list=0; // Out of memory break; } strmov((*start_list)->table= strmov(((*start_list)->db= (char*) ((*start_list)+1)), - entry->s->db)+1, - entry->s->table_name); + share->db.str)+1, + share->table_name.str); (*start_list)->in_use= entry->in_use ? 1 : 0; (*start_list)->locked= entry->locked_by_name ? 1 : 0; start_list= &(*start_list)->next; @@ -203,10 +756,13 @@ OPEN_TABLE_LIST *list_open_tables(THD *thd, const char *db, const char *wild) void intern_close_table(TABLE *table) { // Free all structures + DBUG_ENTER("intern_close_table"); + free_io_cache(table); delete table->triggers; - if (table->file) - VOID(closefrm(table)); // close file + if (table->file) // Not true if name lock + VOID(closefrm(table, 1)); // close file + DBUG_VOID_RETURN; } /* @@ -223,7 +779,6 @@ void intern_close_table(TABLE *table) static void free_cache_entry(TABLE *table) { DBUG_ENTER("free_cache_entry"); - safe_mutex_assert_owner(&LOCK_open); intern_close_table(table); if (!table->in_use) @@ -264,15 +819,17 @@ void free_io_cache(TABLE *table) */ bool close_cached_tables(THD *thd, bool if_wait_for_refresh, - TABLE_LIST *tables) + TABLE_LIST *tables, bool have_lock) { bool result=0; DBUG_ENTER("close_cached_tables"); DBUG_ASSERT(thd || (!if_wait_for_refresh && !tables)); - VOID(pthread_mutex_lock(&LOCK_open)); + if (!have_lock) + VOID(pthread_mutex_lock(&LOCK_open)); if (!tables) { + refresh_version++; // Force close of open tables while (unused_tables) { #ifdef EXTRA_DEBUG @@ -282,14 +839,20 @@ bool close_cached_tables(THD *thd, bool if_wait_for_refresh, VOID(hash_delete(&open_cache,(byte*) unused_tables)); #endif } - refresh_version++; // Force close of open tables + /* Free table shares */ + while (oldest_unused_share->next) + { + pthread_mutex_lock(&oldest_unused_share->mutex); + VOID(hash_delete(&table_def_cache, (byte*) oldest_unused_share)); + } } else { bool found=0; for (TABLE_LIST *table= tables; table; table= table->next_local) { - if (remove_table_from_cache(thd, table->db, table->table_name, + if ((!table->table || !table->table->s->log_table) && + remove_table_from_cache(thd, table->db, table->table_name, RTFC_OWNED_BY_THD_FLAG)) found=1; } @@ -323,7 +886,8 @@ bool close_cached_tables(THD *thd, bool if_wait_for_refresh, for (uint idx=0 ; idx < open_cache.records ; idx++) { TABLE *table=(TABLE*) hash_element(&open_cache,idx); - if ((table->s->version) < refresh_version && table->db_stat) + if (!table->s->log_table && + ((table->s->version) < refresh_version && table->db_stat)) { found=1; pthread_cond_wait(&COND_refresh,&LOCK_open); @@ -343,7 +907,8 @@ bool close_cached_tables(THD *thd, bool if_wait_for_refresh, for (TABLE *table=thd->open_tables; table ; table= table->next) table->s->version= refresh_version; } - VOID(pthread_mutex_unlock(&LOCK_open)); + if (!have_lock) + VOID(pthread_mutex_unlock(&LOCK_open)); if (if_wait_for_refresh) { pthread_mutex_lock(&thd->mysys_var->mutex); @@ -493,6 +1058,19 @@ void close_thread_tables(THD *thd, bool lock_in_use, bool skip_derived) /* Fallthrough */ } + /* + For RBR: before calling close_thread_tables(), storage engines + should autocommit. Hence if there is a a pending event, it belongs + to a non-transactional engine, which writes directly to the table, + and should therefore be flushed before unlocking and closing the + tables. The test above for locked tables will not be triggered + since RBR locks and unlocks tables on a per-event basis. + + TODO (WL#3023): Change the semantics so that RBR does not lock and + unlock tables on a per-event basis. + */ + thd->binlog_flush_pending_rows_event(true); + if (thd->lock) { mysql_unlock_tables(thd, thd->lock); @@ -513,11 +1091,10 @@ void close_thread_tables(THD *thd, bool lock_in_use, bool skip_derived) /* VOID(pthread_sigmask(SIG_SETMASK,&thd->block_signals,NULL)); */ if (!lock_in_use) VOID(pthread_mutex_lock(&LOCK_open)); - safe_mutex_assert_owner(&LOCK_open); DBUG_PRINT("info", ("thd->open_tables: %p", thd->open_tables)); - found_old_table= 0; + found_old_table= 0; while (thd->open_tables) found_old_table|=close_thread_table(thd, &thd->open_tables); thd->some_tables_deleted=0; @@ -575,7 +1152,7 @@ bool close_thread_table(THD *thd, TABLE **table_ptr) else { // Free memory and reset for next loop - table->file->reset(); + table->file->ha_reset(); } table->in_use=0; if (unused_tables) @@ -591,22 +1168,10 @@ bool close_thread_table(THD *thd, TABLE **table_ptr) DBUG_RETURN(found_old_table); } - /* Close and delete temporary tables */ - -void close_temporary(TABLE *table,bool delete_table) -{ - DBUG_ENTER("close_temporary"); - char path[FN_REFLEN]; - db_type table_type=table->s->db_type; - strmov(path,table->s->path); - free_io_cache(table); - closefrm(table); - my_free((char*) table,MYF(0)); - if (delete_table) - rm_temporary_table(table_type, path); - DBUG_VOID_RETURN; -} +/* + Close all temporary tables created by 'CREATE TEMPORARY TABLE' for thread +*/ void close_temporary_tables(THD *thd) { @@ -622,12 +1187,14 @@ void close_temporary_tables(THD *thd) query_buf_size= 50; // Enough for DROP ... TABLE IF EXISTS for (table=thd->temporary_tables ; table ; table=table->next) + { /* We are going to add 4 ` around the db/table names, so 1 does not look - enough; indeed it is enough, because table->key_length is greater (by 8, - because of server_id and thread_id) than db||table. + enough; indeed it is enough, because table->table_cache_key.length is + greater (by 8, because of server_id and thread_id) than db||table. */ - query_buf_size+= table->s->key_length+1; + query_buf_size+= table->s->table_cache_key.length+1; + } if ((query = alloc_root(thd->mem_root, query_buf_size))) // Better add "if exists", in case a RESET MASTER has been done @@ -638,23 +1205,24 @@ void close_temporary_tables(THD *thd) if (query) // we might be out of memory, but this is not fatal { // skip temporary tables not created directly by the user - if (table->s->table_name[0] != '#') + if (table->s->table_name.str[0] != '#') found_user_tables = 1; - end = strxmov(end,"`",table->s->db,"`.`", - table->s->table_name,"`,", NullS); + end= strxmov(end, "`",table->s->db.str, "`.`", + table->s->table_name.str, "`,", NullS); } next=table->next; - close_temporary(table, 1); + close_temporary(table, 1, 1); } - if (query && found_user_tables && mysql_bin_log.is_open()) + if (query && found_user_tables && mysql_bin_log.is_open() && + !thd->current_stmt_binlog_row_based) // CREATE TEMP TABLE not binlogged if row-based { /* The -1 is to remove last ',' */ thd->clear_error(); Query_log_event qinfo(thd, query, (ulong)(end-query)-1, 0, FALSE); /* Imagine the thread had created a temp table, then was doing a SELECT, and - the SELECT was killed. Then it's not clever to mark the statement above as - "killed", because it's not really a statement updating data, and there + the SELECT was killed. Then it's not clever to mark the statement above + as "killed", because it's not really a statement updating data, and there are 99.99% chances it will succeed on slave. If a real update (one updating a persistent table) was killed on the master, then this real update will be logged with error_code=killed, @@ -840,43 +1408,93 @@ void update_non_unique_table_error(TABLE_LIST *update, } -TABLE **find_temporary_table(THD *thd, const char *db, const char *table_name) +TABLE *find_temporary_table(THD *thd, const char *db, const char *table_name) { char key[MAX_DBKEY_LENGTH]; - uint key_length= (uint) (strmov(strmov(key,db)+1,table_name)-key)+1; - TABLE *table,**prev; + uint key_length; + TABLE_LIST table_list; + TABLE *table; - int4store(key+key_length,thd->server_id); - key_length += 4; - int4store(key+key_length,thd->variables.pseudo_thread_id); - key_length += 4; + table_list.db= (char*) db; + table_list.table_name= (char*) table_name; + return find_temporary_table(thd, &table_list); +} - prev= &thd->temporary_tables; - for (table=thd->temporary_tables ; table ; table=table->next) + +TABLE *find_temporary_table(THD *thd, TABLE_LIST *table_list) +{ + char key[MAX_DBKEY_LENGTH]; + uint key_length; + TABLE *table; + + key_length= create_table_def_key(thd, key, table_list, 1); + for (table=thd->temporary_tables ; table ; table= table->next) { - if (table->s->key_length == key_length && - !memcmp(table->s->table_cache_key,key,key_length)) - return prev; - prev= &table->next; + if (table->s->table_cache_key.length == key_length && + !memcmp(table->s->table_cache_key.str, key, key_length)) + return table; } return 0; // Not a temporary table } -bool close_temporary_table(THD *thd, const char *db, const char *table_name) + +/* + Close temporary table and unlink from thd->temporary tables +*/ + +bool close_temporary_table(THD *thd, TABLE_LIST *table_list) { - TABLE *table,**prev; + TABLE *table; - if (!(prev=find_temporary_table(thd,db,table_name))) + if (!(table= find_temporary_table(thd, table_list))) return 1; - table= *prev; - *prev= table->next; - close_temporary(table, 1); - if (thd->slave_thread) - --slave_open_temp_tables; + close_temporary_table(thd, table, 1, 1); return 0; } /* + Close temporary table and unlink from thd->temporary tables +*/ + +void close_temporary_table(THD *thd, TABLE *table, + bool free_share, bool delete_table) +{ + TABLE **prev= table->open_prev; + if ((*table->open_prev= table->next)) + table->next->open_prev= prev; + if (thd->slave_thread) + slave_open_temp_tables--; + close_temporary(table, free_share, delete_table); +} + + +/* + Close and delete a temporary table + + NOTE + This dosn't unlink table from thd->temporary + If this is needed, use close_temporary_table() +*/ + +void close_temporary(TABLE *table, bool free_share, bool delete_table) +{ + handlerton *table_type= table->s->db_type; + DBUG_ENTER("close_temporary"); + + free_io_cache(table); + closefrm(table, 0); + if (delete_table) + rm_temporary_table(table_type, table->s->path.str); + if (free_share) + { + free_table_share(table->s); + my_free((char*) table,MYF(0)); + } + DBUG_VOID_RETURN; +} + + +/* Used by ALTER TABLE when the table is a temporary one. It changes something only if the ALTER contained a RENAME clause (otherwise, table_name is the old name). @@ -889,21 +1507,28 @@ bool rename_temporary_table(THD* thd, TABLE *table, const char *db, { char *key; TABLE_SHARE *share= table->s; - - if (!(key=(char*) alloc_root(&table->mem_root, - (uint) strlen(db)+ - (uint) strlen(table_name)+6+4))) - return 1; /* purecov: inspected */ - share->key_length= (uint) - (strmov((char*) (share->table_name= strmov(share->table_cache_key= key, - db)+1), - table_name) - share->table_cache_key)+1; - share->db= share->table_cache_key; - int4store(key+share->key_length, thd->server_id); - share->key_length+= 4; - int4store(key+share->key_length, thd->variables.pseudo_thread_id); - share->key_length+= 4; - return 0; + TABLE_LIST table_list; + uint db_length, table_length; + DBUG_ENTER("rename_temporary_table"); + + if (!(key=(char*) alloc_root(&share->mem_root, + (uint) (db_length= strlen(db))+ + (uint) (table_length= strlen(table_name))+6+4))) + DBUG_RETURN(1); /* purecov: inspected */ + + table_list.db= (char*) db; + table_list.table_name= (char*) table_name; + share->db.str= share->table_cache_key.str= key; + share->db.length= db_length; + share->table_cache_key.length= create_table_def_key(thd, key, + &table_list, 1); + /* + Here we use the fact that table_name is stored as the second component + in the 'key' (after db_name), where components are separated with \0 + */ + share->table_name.str= key+db_length+1; + share->table_name.length= table_length; + DBUG_RETURN(0); } @@ -933,16 +1558,16 @@ static void relink_unused(TABLE *table) TABLE *unlink_open_table(THD *thd, TABLE *list, TABLE *find) { char key[MAX_DBKEY_LENGTH]; - uint key_length= find->s->key_length; + uint key_length= find->s->table_cache_key.length; TABLE *start=list,**prev,*next; prev= &start; - memcpy(key, find->s->table_cache_key, key_length); + memcpy(key, find->s->table_cache_key.str, key_length); for (; list ; list=next) { next=list->next; - if (list->s->key_length == key_length && - !memcmp(list->s->table_cache_key, key, key_length)) + if (list->s->table_cache_key.length == key_length && + !memcmp(list->s->table_cache_key.str, key, key_length)) { if (thd->locked_tables) mysql_lock_remove(thd, thd->locked_tables,list); @@ -962,24 +1587,39 @@ TABLE *unlink_open_table(THD *thd, TABLE *list, TABLE *find) /* - When we call the following function we must have a lock on - LOCK_open ; This lock will be unlocked on return. + Wait for condition but allow the user to send a kill to mysqld + + SYNOPSIS + wait_for_condition() + thd Thread handler + mutex mutex that is currently hold that is associated with condition + Will be unlocked on return + cond Condition to wait for */ -void wait_for_refresh(THD *thd) +void wait_for_condition(THD *thd, pthread_mutex_t *mutex, pthread_cond_t *cond) { - safe_mutex_assert_owner(&LOCK_open); - /* Wait until the current table is up to date */ const char *proc_info; - thd->mysys_var->current_mutex= &LOCK_open; - thd->mysys_var->current_cond= &COND_refresh; + thd->mysys_var->current_mutex= mutex; + thd->mysys_var->current_cond= cond; proc_info=thd->proc_info; thd->proc_info="Waiting for table"; if (!thd->killed) - (void) pthread_cond_wait(&COND_refresh,&LOCK_open); + (void) pthread_cond_wait(cond, mutex); - pthread_mutex_unlock(&LOCK_open); // Must be unlocked first + /* + We must unlock mutex first to avoid deadlock becasue conditions are + sent to this thread by doing locks in the following order: + lock(mysys_var->mutex) + lock(mysys_var->current_mutex) + + One by effect of this that one can only use wait_for_condition with + condition variables that are guranteed to not disapper (freed) even if this + mutex is unlocked + */ + + pthread_mutex_unlock(mutex); pthread_mutex_lock(&thd->mysys_var->mutex); thd->mysys_var->current_mutex= 0; thd->mysys_var->current_cond= 0; @@ -1025,10 +1665,9 @@ bool reopen_name_locked_table(THD* thd, TABLE_LIST* table_list) orig_table= *table; key_length=(uint) (strmov(strmov(key,db)+1,table_name)-key)+1; - if (open_unireg_entry(thd, table, db, table_name, table_name, 0, - thd->mem_root) || - !(table->s->table_cache_key= memdup_root(&table->mem_root, (char*) key, - key_length))) + if (open_unireg_entry(thd, table, table_list, table_name, + table->s->table_cache_key.str, + table->s->table_cache_key.length, thd->mem_root)) { intern_close_table(table); /* @@ -1042,8 +1681,6 @@ bool reopen_name_locked_table(THD* thd, TABLE_LIST* table_list) } share= table->s; - share->db= share->table_cache_key; - share->key_length=key_length; share->version=0; share->flush_version=0; table->in_use = thd; @@ -1107,17 +1744,17 @@ TABLE *open_table(THD *thd, TABLE_LIST *table_list, MEM_ROOT *mem_root, if (thd->killed) DBUG_RETURN(0); - key_length= (uint) (strmov(strmov(key, table_list->db)+1, - table_list->table_name)-key)+1; - int4store(key + key_length, thd->server_id); - int4store(key + key_length + 4, thd->variables.pseudo_thread_id); + + key_length= (create_table_def_key(thd, key, table_list, 1) - + TMP_TABLE_KEY_EXTRA); if (!table_list->skip_temporary) { for (table= thd->temporary_tables; table ; table=table->next) { - if (table->s->key_length == key_length + TMP_TABLE_KEY_EXTRA && - !memcmp(table->s->table_cache_key, key, + if (table->s->table_cache_key.length == key_length + + TMP_TABLE_KEY_EXTRA && + !memcmp(table->s->table_cache_key.str, key, key_length + TMP_TABLE_KEY_EXTRA)) { if (table->query_id == thd->query_id || @@ -1144,8 +1781,8 @@ TABLE *open_table(THD *thd, TABLE_LIST *table_list, MEM_ROOT *mem_root, (int) TL_WRITE_ALLOW_WRITE); for (table=thd->open_tables; table ; table=table->next) { - if (table->s->key_length == key_length && - !memcmp(table->s->table_cache_key, key, key_length)) + if (table->s->table_cache_key.length == key_length && + !memcmp(table->s->table_cache_key.str, key, key_length)) { if (check_if_used && table->query_id && table->query_id != thd->query_id) @@ -1157,7 +1794,7 @@ TABLE *open_table(THD *thd, TABLE_LIST *table_list, MEM_ROOT *mem_root, is not already open by some calling stamement. */ my_error(ER_CANT_UPDATE_USED_TABLE_IN_SF_OR_TRG, MYF(0), - table->s->table_name); + table->s->table_name.str); DBUG_RETURN(0); } if (!my_strcasecmp(system_charset_info, table->alias, alias) && @@ -1211,10 +1848,9 @@ TABLE *open_table(THD *thd, TABLE_LIST *table_list, MEM_ROOT *mem_root, */ { char path[FN_REFLEN]; - db_type not_used; - strxnmov(path, FN_REFLEN, mysql_data_home, "/", table_list->db, "/", - table_list->table_name, reg_ext, NullS); - (void) unpack_filename(path, path); + enum legacy_db_type not_used; + build_table_filename(path, sizeof(path) - 1, + table_list->db, table_list->table_name, reg_ext); if (mysql_frm_type(thd, path, ¬_used) == FRMTYPE_VIEW) { /* @@ -1224,9 +1860,8 @@ TABLE *open_table(THD *thd, TABLE_LIST *table_list, MEM_ROOT *mem_root, TABLE tab; table= &tab; VOID(pthread_mutex_lock(&LOCK_open)); - if (!open_unireg_entry(thd, table, table_list->db, - table_list->table_name, - alias, table_list, mem_root)) + if (!open_unireg_entry(thd, table, table_list, alias, + key, key_length, mem_root)) { DBUG_ASSERT(table_list->view != 0); VOID(pthread_mutex_unlock(&LOCK_open)); @@ -1244,7 +1879,7 @@ TABLE *open_table(THD *thd, TABLE_LIST *table_list, MEM_ROOT *mem_root, if (!thd->open_tables) thd->version=refresh_version; else if ((thd->version != refresh_version) && - ! (flags & MYSQL_LOCK_IGNORE_FLUSH)) + ! (flags & MYSQL_LOCK_IGNORE_FLUSH) && !table->s->log_table) { /* Someone did a refresh while thread was opening tables */ if (refresh) @@ -1265,7 +1900,11 @@ TABLE *open_table(THD *thd, TABLE_LIST *table_list, MEM_ROOT *mem_root, { if (table->s->version != refresh_version) { - if (flags & MYSQL_LOCK_IGNORE_FLUSH) + /* + Don't close tables if we are working with a log table or were + asked not to close the table explicitly + */ + if (flags & MYSQL_LOCK_IGNORE_FLUSH || table->s->log_table) { /* Force close at once after usage */ thd->version= table->s->version; @@ -1278,7 +1917,7 @@ TABLE *open_table(THD *thd, TABLE_LIST *table_list, MEM_ROOT *mem_root, */ close_old_data_files(thd,thd->open_tables,0,0); if (table->in_use != thd) - wait_for_refresh(thd); + wait_for_condition(thd, &LOCK_open, &COND_refresh); else { VOID(pthread_mutex_unlock(&LOCK_open)); @@ -1313,15 +1952,11 @@ TABLE *open_table(THD *thd, TABLE_LIST *table_list, MEM_ROOT *mem_root, VOID(pthread_mutex_unlock(&LOCK_open)); DBUG_RETURN(NULL); } - if (open_unireg_entry(thd, table, table_list->db, table_list->table_name, - alias, table_list, mem_root) || - (!table_list->view && - !(table->s->table_cache_key= memdup_root(&table->mem_root, - (char*) key, - key_length)))) + + if (open_unireg_entry(thd, table, table_list, alias, key, key_length, + mem_root)) { - table->next=table->prev=table; - free_cache_entry(table); + my_free((gptr)table, MYF(0)); VOID(pthread_mutex_unlock(&LOCK_open)); DBUG_RETURN(NULL); } @@ -1331,11 +1966,6 @@ TABLE *open_table(THD *thd, TABLE_LIST *table_list, MEM_ROOT *mem_root, VOID(pthread_mutex_unlock(&LOCK_open)); DBUG_RETURN(0); // VIEW } - share= table->s; - share->db= share->table_cache_key; - share->key_length= key_length; - share->version= refresh_version; - share->flush_version= flush_version; DBUG_PRINT("info", ("inserting table %p into the cache", table)); VOID(my_hash_insert(&open_cache,(byte*) table)); } @@ -1351,9 +1981,11 @@ TABLE *open_table(THD *thd, TABLE_LIST *table_list, MEM_ROOT *mem_root, table->reginfo.lock_type=TL_READ; /* Assume read */ reset: + DBUG_ASSERT(table->s->ref_count > 0 || table->s->tmp_table != NO_TMP_TABLE); + if (thd->lex->need_correct_ident()) table->alias_name_used= my_strcasecmp(table_alias_charset, - table->s->table_name, alias); + table->s->table_name.str, alias); /* Fix alias if table name changes */ if (strcmp(table->alias, alias)) { @@ -1388,63 +2020,64 @@ TABLE *find_locked_table(THD *thd, const char *db,const char *table_name) for (TABLE *table=thd->open_tables; table ; table=table->next) { - if (table->s->key_length == key_length && - !memcmp(table->s->table_cache_key,key,key_length)) + if (table->s->table_cache_key.length == key_length && + !memcmp(table->s->table_cache_key.str, key, key_length)) return table; } return(0); } -/**************************************************************************** - Reopen an table because the definition has changed. The date file for the - table is already closed. +/* + Reopen an table because the definition has changed. SYNOPSIS reopen_table() - table Table to be opened - locked 1 if we have already a lock on LOCK_open + table Table object NOTES - table->query_id will be 0 if table was reopened + The data file for the table is already closed and the share is released + The table has a 'dummy' share that mainly contains database and table name. - RETURN - 0 ok - 1 error ('table' is unchanged if table couldn't be reopened) -****************************************************************************/ + RETURN + 0 ok + 1 error. The old table object is not changed. +*/ -bool reopen_table(TABLE *table,bool locked) +static bool reopen_table(TABLE *table) { TABLE tmp; - char *db= table->s->table_cache_key; - const char *table_name= table->s->table_name; bool error= 1; Field **field; uint key,part; + TABLE_LIST table_list; + THD *thd= table->in_use; DBUG_ENTER("reopen_table"); + DBUG_ASSERT(table->s->ref_count == 0); + DBUG_ASSERT(!table->sort.io_cache); + #ifdef EXTRA_DEBUG if (table->db_stat) sql_print_error("Table %s had a open data handler in reopen_table", table->alias); #endif - if (!locked) - VOID(pthread_mutex_lock(&LOCK_open)); - safe_mutex_assert_owner(&LOCK_open); - if (open_unireg_entry(table->in_use, &tmp, db, table_name, - table->alias, 0, table->in_use->mem_root)) - goto end; - free_io_cache(table); + table_list.db= table->s->db.str; + table_list.table_name= table->s->table_name.str; + table_list.table= table; + table_list.belong_to_view= 0; + table_list.next_local= 0; - if (!(tmp.s->table_cache_key= memdup_root(&tmp.mem_root,db, - table->s->key_length))) - { - delete tmp.triggers; - closefrm(&tmp); // End of memory + if (wait_for_locked_table_names(thd, &table_list)) + DBUG_RETURN(1); // Thread was killed + + if (open_unireg_entry(thd, &tmp, &table_list, + table->alias, + table->s->table_cache_key.str, + table->s->table_cache_key.length, + thd->mem_root)) goto end; - } - tmp.s->db= tmp.s->table_cache_key; /* This list copies variables set by open_table */ tmp.tablenr= table->tablenr; @@ -1456,12 +2089,11 @@ bool reopen_table(TABLE *table,bool locked) tmp.keys_in_use_for_query= tmp.s->keys_in_use; tmp.used_keys= tmp.s->keys_for_keyread; + tmp.s->table_map_id= table->s->table_map_id; + /* Get state */ - tmp.s->key_length= table->s->key_length; - tmp.in_use= table->in_use; + tmp.in_use= thd; tmp.reginfo.lock_type=table->reginfo.lock_type; - tmp.s->version= refresh_version; - tmp.s->tmp_table= table->s->tmp_table; tmp.grant= table->grant; /* Replace table in open list */ @@ -1470,11 +2102,10 @@ bool reopen_table(TABLE *table,bool locked) delete table->triggers; if (table->file) - VOID(closefrm(table)); // close file, free everything + VOID(closefrm(table, 1)); // close file, free everything *table= tmp; - table->s= &table->share_not_to_be_used; - table->file->change_table_ptr(table); + table->file->change_table_ptr(table, table->s); DBUG_ASSERT(table->alias != 0); for (field=table->field ; *field ; field++) @@ -1494,8 +2125,6 @@ bool reopen_table(TABLE *table,bool locked) error=0; end: - if (!locked) - VOID(pthread_mutex_unlock(&LOCK_open)); DBUG_RETURN(error); } @@ -1504,22 +2133,23 @@ bool reopen_table(TABLE *table,bool locked) Used with ALTER TABLE: Close all instanses of table when LOCK TABLES is in used; Close first all instances of table and then reopen them - */ +*/ bool close_data_tables(THD *thd,const char *db, const char *table_name) { TABLE *table; + DBUG_ENTER("close_data_tables"); + for (table=thd->open_tables; table ; table=table->next) { - if (!strcmp(table->s->table_name, table_name) && - !strcmp(table->s->db, db)) + if (!strcmp(table->s->table_name.str, table_name) && + !strcmp(table->s->db.str, db)) { mysql_lock_remove(thd, thd->locked_tables,table); - table->file->close(); - table->db_stat=0; + close_handle_and_leave_table_as_lock(table); } } - return 0; // For the future + DBUG_RETURN(0); // For the future } @@ -1530,20 +2160,21 @@ bool close_data_tables(THD *thd,const char *db, const char *table_name) bool reopen_tables(THD *thd,bool get_locks,bool in_refresh) { + TABLE *table,*next,**prev; + TABLE **tables,**tables_ptr; // For locks + bool error=0, not_used; DBUG_ENTER("reopen_tables"); - safe_mutex_assert_owner(&LOCK_open); if (!thd->open_tables) DBUG_RETURN(0); - TABLE *table,*next,**prev; - TABLE **tables,**tables_ptr; // For locks - bool error=0, not_used; + safe_mutex_assert_owner(&LOCK_open); if (get_locks) { /* The ptr is checked later */ uint opens=0; - for (table=thd->open_tables; table ; table=table->next) opens++; + for (table= thd->open_tables; table ; table=table->next) + opens++; tables= (TABLE**) my_alloca(sizeof(TABLE*)*opens); } else @@ -1555,7 +2186,7 @@ bool reopen_tables(THD *thd,bool get_locks,bool in_refresh) { uint db_stat=table->db_stat; next=table->next; - if (!tables || (!db_stat && reopen_table(table,1))) + if (!tables || (!db_stat && reopen_table(table))) { my_error(ER_CANT_REOPEN_TABLE, MYF(0), table->alias); VOID(hash_delete(&open_cache,(byte*) table)); @@ -1596,6 +2227,7 @@ bool reopen_tables(THD *thd,bool get_locks,bool in_refresh) DBUG_RETURN(error); } + /* Close handlers for tables in list, but leave the TABLE structure intact so that we can re-open these quickly @@ -1605,25 +2237,23 @@ bool reopen_tables(THD *thd,bool get_locks,bool in_refresh) void close_old_data_files(THD *thd, TABLE *table, bool abort_locks, bool send_refresh) { + bool found= send_refresh; DBUG_ENTER("close_old_data_files"); - bool found=send_refresh; + for (; table ; table=table->next) { if (table->s->version != refresh_version) { found=1; - if (!abort_locks) // If not from flush tables - table->s->version= refresh_version; // Let other threads use table if (table->db_stat) { if (abort_locks) { - mysql_lock_abort(thd,table); // Close waiting threads + mysql_lock_abort(thd,table, TRUE); // Close waiting threads mysql_lock_remove(thd, thd->locked_tables,table); table->locked_by_flush=1; // Will be reopened with locks } - table->file->close(); - table->db_stat=0; + close_handle_and_leave_table_as_lock(table); } } } @@ -1637,14 +2267,21 @@ void close_old_data_files(THD *thd, TABLE *table, bool abort_locks, Wait until all threads has closed the tables in the list We have also to wait if there is thread that has a lock on this table even if the table is closed + NOTE: log tables are handled differently by the logging routines. + E.g. general_log is always opened and locked by the logger + and the table handler used by the logger, will be skipped by + this check. */ bool table_is_used(TABLE *table, bool wait_for_name_lock) { + DBUG_ENTER("table_is_used"); do { - char *key= table->s->table_cache_key; - uint key_length= table->s->key_length; + char *key= table->s->table_cache_key.str; + uint key_length= table->s->table_cache_key.length; + + DBUG_PRINT("loop", ("table_name: %s", table->alias)); HASH_SEARCH_STATE state; for (TABLE *search= (TABLE*) hash_first(&open_cache, (byte*) key, key_length, &state); @@ -1652,13 +2289,32 @@ bool table_is_used(TABLE *table, bool wait_for_name_lock) search= (TABLE*) hash_next(&open_cache, (byte*) key, key_length, &state)) { - if (search->locked_by_flush || - search->locked_by_name && wait_for_name_lock || - search->db_stat && search->s->version < refresh_version) - return 1; // Table is used + DBUG_PRINT("info", ("share: 0x%lx locked_by_logger: %d " + "locked_by_flush: %d locked_by_name: %d " + "db_stat: %u version: %u", + (ulong) search->s, search->locked_by_logger, + search->locked_by_flush, search->locked_by_name, + search->db_stat, + search->s->version)); + if (search->in_use == table->in_use) + continue; // Name locked by this thread + /* + We can't use the table under any of the following conditions: + - There is an name lock on it (Table is to be deleted or altered) + - If we are in flush table and we didn't execute the flush + - If the table engine is open and it's an old version + (We must wait until all engines are shut down to use the table) + However we fo not wait if we encountered a table, locked by the logger. + Log tables are managed separately by logging routines. + */ + if (!search->locked_by_logger && + (search->locked_by_name && wait_for_name_lock || + search->locked_by_flush || + (search->db_stat && search->s->version < refresh_version))) + return 1; } } while ((table=table->next)); - return 0; + DBUG_RETURN(0); } @@ -1705,8 +2361,8 @@ bool drop_locked_tables(THD *thd,const char *db, const char *table_name) for (table= thd->open_tables; table ; table=next) { next=table->next; - if (!strcmp(table->s->table_name, table_name) && - !strcmp(table->s->db, db)) + if (!strcmp(table->s->table_name.str, table_name) && + !strcmp(table->s->db.str, db)) { mysql_lock_remove(thd, thd->locked_tables,table); VOID(hash_delete(&open_cache,(byte*) table)); @@ -1741,10 +2397,10 @@ void abort_locked_tables(THD *thd,const char *db, const char *table_name) TABLE *table; for (table= thd->open_tables; table ; table= table->next) { - if (!strcmp(table->s->table_name,table_name) && - !strcmp(table->s->db, db)) + if (!strcmp(table->s->table_name.str, table_name) && + !strcmp(table->s->db.str, db)) { - mysql_lock_abort(thd,table); + mysql_lock_abort(thd,table, TRUE); break; } } @@ -1752,141 +2408,214 @@ void abort_locked_tables(THD *thd,const char *db, const char *table_name) /* + Function to assign a new table map id to a table share. + + PARAMETERS + + share - Pointer to table share structure + + DESCRIPTION + + We are intentionally not checking that share->mutex is locked + since this function should only be called when opening a table + share and before it is entered into the table_def_cache (meaning + that it cannot be fetched by another thread, even accidentally). + + PRE-CONDITION(S) + + share is non-NULL + The LOCK_open mutex is locked + + POST-CONDITION(S) + + share->table_map_id is given a value that with a high certainty is + not used by any other table (the only case where a table id can be + reused is on wrap-around, which means more than 4 billion table + shares open at the same time). + + share->table_map_id is not ULONG_MAX. + */ +void assign_new_table_id(TABLE_SHARE *share) +{ + static ulong last_table_id= ULONG_MAX; + + DBUG_ENTER("assign_new_table_id"); + + /* Preconditions */ + DBUG_ASSERT(share != NULL); + safe_mutex_assert_owner(&LOCK_open); + + ulong tid= ++last_table_id; /* get next id */ + /* + There is one reserved number that cannot be used. Remember to + change this when 6-byte global table id's are introduced. + */ + if (unlikely(tid == ULONG_MAX)) + tid= ++last_table_id; + share->table_map_id= tid; + DBUG_PRINT("info", ("table_id=%lu", tid)); + + /* Post conditions */ + DBUG_ASSERT(share->table_map_id != ULONG_MAX); + + DBUG_VOID_RETURN; +} + +/* Load a table definition from file and open unireg table SYNOPSIS open_unireg_entry() thd Thread handle entry Store open table definition here - db Database name - name Table name + table_list TABLE_LIST with db, table_name & belong_to_view alias Alias name - table_desc TABLE_LIST descriptor (used with views) + cache_key Key for share_cache + cache_key_length length of cache_key mem_root temporary mem_root for parsing NOTES Extra argument for open is taken from thd->open_options + One must have a lock on LOCK_open when calling this function RETURN 0 ok # Error */ -static int open_unireg_entry(THD *thd, TABLE *entry, const char *db, - const char *name, const char *alias, - TABLE_LIST *table_desc, MEM_ROOT *mem_root) + +static int open_unireg_entry(THD *thd, TABLE *entry, TABLE_LIST *table_list, + const char *alias, + char *cache_key, uint cache_key_length, + MEM_ROOT *mem_root) { - char path[FN_REFLEN]; int error; + TABLE_SHARE *share; uint discover_retry_count= 0; DBUG_ENTER("open_unireg_entry"); - strxmov(path, mysql_data_home, "/", db, "/", name, NullS); - while ((error= openfrm(thd, path, alias, - (uint) (HA_OPEN_KEYFILE | HA_OPEN_RNDFILE | - HA_GET_INDEX | HA_TRY_READ_ONLY | - NO_ERR_ON_NEW_FRM), - READ_KEYINFO | COMPUTE_TYPES | EXTRA_RECORD, - thd->open_options, entry)) && - (error != 5 || - (fn_format(path, path, 0, reg_ext, MY_UNPACK_FILENAME), - open_new_frm(thd, path, alias, db, name, - (uint) (HA_OPEN_KEYFILE | HA_OPEN_RNDFILE | - HA_GET_INDEX | HA_TRY_READ_ONLY), - READ_KEYINFO | COMPUTE_TYPES | EXTRA_RECORD, - thd->open_options, entry, table_desc, mem_root)))) + safe_mutex_assert_owner(&LOCK_open); +retry: + if (!(share= get_table_share_with_create(thd, table_list, cache_key, + cache_key_length, + OPEN_VIEW, &error))) + DBUG_RETURN(1); + + if (share->is_view) { - if (!entry->s || !entry->s->crashed) + /* Open view */ + error= (int) open_new_frm(thd, share, alias, + (uint) (HA_OPEN_KEYFILE | HA_OPEN_RNDFILE | + HA_GET_INDEX | HA_TRY_READ_ONLY), + READ_KEYINFO | COMPUTE_TYPES | EXTRA_RECORD, + thd->open_options, entry, table_list, + mem_root); + if (error) + goto err; + /* TODO: Don't free this */ + release_table_share(share, RELEASE_NORMAL); + DBUG_RETURN(0); + } + + while ((error= open_table_from_share(thd, share, alias, + (uint) (HA_OPEN_KEYFILE | + HA_OPEN_RNDFILE | + HA_GET_INDEX | + HA_TRY_READ_ONLY), + (READ_KEYINFO | COMPUTE_TYPES | + EXTRA_RECORD), + thd->open_options, entry, FALSE))) + { + if (error == 7) // Table def changed { + share->version= 0; // Mark share as old + if (discover_retry_count++) // Retry once + goto err; + /* - Frm file could not be found on disk - Since it does not exist, no one can be using it - LOCK_open has been locked to protect from someone else - trying to discover the table at the same time. + TODO: + Here we should wait until all threads has released the table. + For now we do one retry. This may cause a deadlock if there + is other threads waiting for other tables used by this thread. + + Proper fix would be to if the second retry failed: + - Mark that table def changed + - Return from open table + - Close all tables used by this thread + - Start waiting that the share is released + - Retry by opening all tables again */ - if (discover_retry_count++ != 0) + if (ha_create_table_from_engine(thd, table_list->db, + table_list->table_name)) goto err; - if (ha_create_table_from_engine(thd, db, name) > 0) - { - /* Give right error message */ - thd->clear_error(); - DBUG_PRINT("error", ("Discovery of %s/%s failed", db, name)); - my_printf_error(ER_UNKNOWN_ERROR, - "Failed to open '%-.64s', error while " - "unpacking from engine", - MYF(0), name); - + /* + TO BE FIXED + To avoid deadlock, only wait for release if no one else is + using the share. + */ + if (share->ref_count != 1) goto err; - } - - mysql_reset_errors(thd, 1); // Clear warnings - thd->clear_error(); // Clear error message - continue; - } - - // Code below is for repairing a crashed file - TABLE_LIST table_list; - bzero((char*) &table_list, sizeof(table_list)); // just for safe - table_list.db=(char*) db; - table_list.table_name=(char*) name; - - safe_mutex_assert_owner(&LOCK_open); - - if ((error=lock_table_name(thd,&table_list))) - { - if (error < 0) - { - goto err; - } - if (wait_for_locked_table_names(thd,&table_list)) + /* Free share and wait until it's released by all threads */ + release_table_share(share, RELEASE_WAIT_FOR_DROP); + if (!thd->killed) { - unlock_table_name(thd,&table_list); - goto err; + mysql_reset_errors(thd, 1); // Clear warnings + thd->clear_error(); // Clear error message + goto retry; } + DBUG_RETURN(1); } - pthread_mutex_unlock(&LOCK_open); - thd->clear_error(); // Clear error message - error= 0; - if (openfrm(thd, path, alias, - (uint) (HA_OPEN_KEYFILE | HA_OPEN_RNDFILE | HA_GET_INDEX | - HA_TRY_READ_ONLY), - READ_KEYINFO | COMPUTE_TYPES | EXTRA_RECORD, - ha_open_options | HA_OPEN_FOR_REPAIR, - entry) || ! entry->file || - (entry->file->is_crashed() && entry->file->check_and_repair(thd))) - { - /* Give right error message */ - thd->clear_error(); - my_error(ER_NOT_KEYFILE, MYF(0), name, my_errno); - sql_print_error("Couldn't repair table: %s.%s",db,name); - if (entry->file) - closefrm(entry); - error=1; - } - else - thd->clear_error(); // Clear error message - pthread_mutex_lock(&LOCK_open); - unlock_table_name(thd,&table_list); - - if (error) + if (!entry->s || !entry->s->crashed) goto err; - break; - } - if (error == 5) - DBUG_RETURN(0); // we have just opened VIEW - - /* - We can't mark all tables in 'mysql' database as system since we don't - allow to lock such tables for writing with any other tables (even with - other system tables) and some privilege tables need this. - */ - if (!my_strcasecmp(system_charset_info, db, "mysql") && - !my_strcasecmp(system_charset_info, name, "proc")) - entry->s->system_table= 1; + // Code below is for repairing a crashed file + if ((error= lock_table_name(thd, table_list))) + { + if (error < 0) + goto err; + if (wait_for_locked_table_names(thd, table_list)) + { + unlock_table_name(thd, table_list); + goto err; + } + } + pthread_mutex_unlock(&LOCK_open); + thd->clear_error(); // Clear error message + error= 0; + if (open_table_from_share(thd, share, alias, + (uint) (HA_OPEN_KEYFILE | HA_OPEN_RNDFILE | + HA_GET_INDEX | + HA_TRY_READ_ONLY), + READ_KEYINFO | COMPUTE_TYPES | EXTRA_RECORD, + ha_open_options | HA_OPEN_FOR_REPAIR, + entry, FALSE) || ! entry->file || + (entry->file->is_crashed() && entry->file->check_and_repair(thd))) + { + /* Give right error message */ + thd->clear_error(); + my_error(ER_NOT_KEYFILE, MYF(0), share->table_name.str, my_errno); + sql_print_error("Couldn't repair table: %s.%s", share->db.str, + share->table_name.str); + if (entry->file) + closefrm(entry, 0); + error=1; + } + else + thd->clear_error(); // Clear error message + pthread_mutex_lock(&LOCK_open); + unlock_table_name(thd, table_list); + + if (error) + goto err; + break; + } - if (Table_triggers_list::check_n_load(thd, db, name, entry, 0)) + if (Table_triggers_list::check_n_load(thd, share->db.str, + share->table_name.str, entry, 0)) + { + closefrm(entry, 0); goto err; + } /* If we are here, there was no fatal error (but error may be still @@ -1898,13 +2627,14 @@ static int open_unireg_entry(THD *thd, TABLE *entry, const char *db, if (mysql_bin_log.is_open()) { char *query, *end; - uint query_buf_size= 20 + 2*NAME_LEN + 1; - if ((query= (char*)my_malloc(query_buf_size,MYF(MY_WME)))) + uint query_buf_size= 20 + share->db.length + share->table_name.length +1; + if ((query= (char*) my_malloc(query_buf_size,MYF(MY_WME)))) { + /* this DELETE FROM is needed even with row-based binlogging */ end = strxmov(strmov(query, "DELETE FROM `"), - db,"`.`",name,"`", NullS); - Query_log_event qinfo(thd, query, (ulong)(end-query), 0, FALSE); - mysql_bin_log.write(&qinfo); + share->db.str,"`.`",share->table_name.str,"`", NullS); + thd->binlog_query(THD::STMT_QUERY_TYPE, + query, (ulong)(end-query), FALSE, FALSE); my_free(query, MYF(0)); } else @@ -1914,25 +2644,19 @@ static int open_unireg_entry(THD *thd, TABLE *entry, const char *db, DBA on top of warning the client (which will automatically be done because of MYF(MY_WME) in my_malloc() above). */ - sql_print_error("When opening HEAP table, could not allocate \ -memory to write 'DELETE FROM `%s`.`%s`' to the binary log",db,name); + sql_print_error("When opening HEAP table, could not allocate memory " + "to write 'DELETE FROM `%s`.`%s`' to the binary log", + table_list->db, table_list->table_name); delete entry->triggers; - if (entry->file) - closefrm(entry); + closefrm(entry, 0); goto err; } } } DBUG_RETURN(0); + err: - /* Hide "Table doesn't exist" errors if table belong to view */ - if (thd->net.last_errno == ER_NO_SUCH_TABLE && - table_desc && table_desc->belong_to_view) - { - TABLE_LIST *view= table_desc->belong_to_view; - thd->clear_error(); - my_error(ER_VIEW_INVALID, MYF(0), view->view_db.str, view->view_name.str); - } + release_table_share(share, RELEASE_NORMAL); DBUG_RETURN(1); } @@ -2597,8 +3321,22 @@ void close_tables_for_reopen(THD *thd, TABLE_LIST **tables) /* Open a single table without table caching and don't set it in open_list - Used by alter_table to open a temporary table and when creating - a temporary table with CREATE TEMPORARY ... + + SYNPOSIS + open_temporary_table() + thd Thread object + path Path (without .frm) + db database + table_name Table name + link_in_list 1 if table should be linked into thd->temporary_tables + + NOTES: + Used by alter_table to open a temporary table and when creating + a temporary table with CREATE TEMPORARY ... + + RETURN + 0 Error + # TABLE object */ TABLE *open_temporary_table(THD *thd, const char *path, const char *db, @@ -2606,51 +3344,53 @@ TABLE *open_temporary_table(THD *thd, const char *path, const char *db, { TABLE *tmp_table; TABLE_SHARE *share; + char cache_key[MAX_DBKEY_LENGTH], *saved_cache_key, *tmp_path; + uint key_length; + TABLE_LIST table_list; DBUG_ENTER("open_temporary_table"); - /* - The extra size in my_malloc() is for table_cache_key - 4 bytes for master thread id if we are in the slave - 1 byte to terminate db - 1 byte to terminate table_name - total of 6 extra bytes in my_malloc in addition to table/db stuff - */ - if (!(tmp_table=(TABLE*) my_malloc(sizeof(*tmp_table)+(uint) strlen(db)+ - (uint) strlen(table_name)+6+4, - MYF(MY_WME)))) + table_list.db= (char*) db; + table_list.table_name= (char*) table_name; + /* Create the cache_key for temporary tables */ + key_length= create_table_def_key(thd, cache_key, &table_list, 1); + + if (!(tmp_table= (TABLE*) my_malloc(sizeof(*tmp_table) + sizeof(*share) + + strlen(path)+1 + key_length, + MYF(MY_WME)))) DBUG_RETURN(0); /* purecov: inspected */ - if (openfrm(thd, path, table_name, - (uint) (HA_OPEN_KEYFILE | HA_OPEN_RNDFILE | HA_GET_INDEX), - READ_KEYINFO | COMPUTE_TYPES | EXTRA_RECORD, - ha_open_options, - tmp_table)) + share= (TABLE_SHARE*) (tmp_table+1); + tmp_path= (char*) (share+1); + saved_cache_key= strmov(tmp_path, path)+1; + memcpy(saved_cache_key, cache_key, key_length); + + init_tmp_table_share(share, saved_cache_key, key_length, + strend(saved_cache_key)+1, tmp_path); + + if (open_table_def(thd, share, 0) || + open_table_from_share(thd, share, table_name, + (uint) (HA_OPEN_KEYFILE | HA_OPEN_RNDFILE | + HA_GET_INDEX), + READ_KEYINFO | COMPUTE_TYPES | EXTRA_RECORD, + ha_open_options, + tmp_table, FALSE)) { + /* No need to lock share->mutex as this is not needed for tmp tables */ + free_table_share(share); my_free((char*) tmp_table,MYF(0)); DBUG_RETURN(0); } - share= tmp_table->s; - tmp_table->reginfo.lock_type=TL_WRITE; // Simulate locked + tmp_table->reginfo.lock_type= TL_WRITE; // Simulate locked share->tmp_table= (tmp_table->file->has_transactions() ? TRANSACTIONAL_TMP_TABLE : TMP_TABLE); - share->table_cache_key= (char*) (tmp_table+1); - share->db= share->table_cache_key; - share->key_length= (uint) (strmov(((char*) (share->table_name= - strmov(share->table_cache_key, - db)+1)), - table_name) - - share->table_cache_key) +1; - int4store(share->table_cache_key + share->key_length, thd->server_id); - share->key_length+= 4; - int4store(share->table_cache_key + share->key_length, - thd->variables.pseudo_thread_id); - share->key_length+= 4; if (link_in_list) { - tmp_table->next=thd->temporary_tables; - thd->temporary_tables=tmp_table; + tmp_table->open_prev= &thd->temporary_tables; + if ((tmp_table->next= thd->temporary_tables)) + thd->temporary_tables->open_prev= &tmp_table->next; + thd->temporary_tables= tmp_table; if (thd->slave_thread) slave_open_temp_tables++; } @@ -2658,21 +3398,22 @@ TABLE *open_temporary_table(THD *thd, const char *path, const char *db, } -bool rm_temporary_table(enum db_type base, char *path) +bool rm_temporary_table(handlerton *base, char *path) { bool error=0; + handler *file; + char *ext; DBUG_ENTER("rm_temporary_table"); - fn_format(path, path,"",reg_ext,4); - unpack_filename(path,path); + strmov(ext= strend(path), reg_ext); if (my_delete(path,MYF(0))) error=1; /* purecov: inspected */ - *fn_ext(path)='\0'; // remove extension - handler *file= get_new_handler((TABLE*) 0, current_thd->mem_root, base); + *ext= 0; // remove extension + file= get_new_handler((TABLE_SHARE*) 0, current_thd->mem_root, base); if (file && file->delete_table(path)) { error=1; - sql_print_warning("Could not remove tmp table: '%s', error: %d", + sql_print_warning("Could not remove temporary table: '%s', error: %d", path, my_errno); } delete file; @@ -2699,15 +3440,20 @@ static void update_field_dependencies(THD *thd, Field *field, TABLE *table) { if (thd->set_query_id) { + table->file->ha_set_bit_in_rw_set(field->fieldnr, + (bool)(thd->set_query_id-1)); if (field->query_id != thd->query_id) { + if (table->get_fields_in_item_tree) + field->flags|= GET_FIXED_FIELDS_FLAG; field->query_id= thd->query_id; table->used_fields++; table->used_keys.intersect(field->part_of_key); } else thd->dupp_field= field; - } + } else if (table->get_fields_in_item_tree) + field->flags|= GET_FIXED_FIELDS_FLAG; } @@ -2924,8 +3670,18 @@ find_field_in_table(THD *thd, TABLE *table, const char *name, uint length, table->field[cached_field_index]->field_name, name)) field_ptr= table->field + cached_field_index; else if (table->s->name_hash.records) + { field_ptr= (Field**) hash_search(&table->s->name_hash, (byte*) name, length); + if (field_ptr) + { + /* + field_ptr points to field in TABLE_SHARE. Convert it to the matching + field in table + */ + field_ptr= (table->field + (field_ptr - table->s->field)); + } + } else { if (!(field_ptr= table->field)) @@ -2944,8 +3700,9 @@ find_field_in_table(THD *thd, TABLE *table, const char *name, uint length, { if (!allow_rowid || my_strcasecmp(system_charset_info, name, "_rowid") || - !(field=table->rowid_field)) + table->s->rowid_field_offset == 0) DBUG_RETURN((Field*) 0); + field= table->field[table->s->rowid_field_offset-1]; } update_field_dependencies(thd, field, table); @@ -3094,18 +3851,88 @@ find_field_in_table_ref(THD *thd, TABLE_LIST *table_list, register_tree_change, actual_table); } + if (fld) + { #ifndef NO_EMBEDDED_ACCESS_CHECKS - /* Check if there are sufficient access rights to the found field. */ - if (fld && check_privileges && - check_column_grant_in_table_ref(thd, *actual_table, name, length)) - fld= WRONG_GRANT; + /* Check if there are sufficient access rights to the found field. */ + if (check_privileges && + check_column_grant_in_table_ref(thd, *actual_table, name, length)) + fld= WRONG_GRANT; + else #endif - + if (thd->set_query_id) + { + /* + * get rw_set correct for this field so that the handler + * knows that this field is involved in the query and gets + * retrieved/updated + */ + Field *field_to_set= NULL; + if (fld == view_ref_found) + { + Item *it= (*ref)->real_item(); + if (it->type() == Item::FIELD_ITEM) + field_to_set= ((Item_field*)it)->field; + } + else + field_to_set= fld; + if (field_to_set) + field_to_set->table->file-> + ha_set_bit_in_rw_set(field_to_set->fieldnr, + (bool)(thd->set_query_id-1)); + } + } DBUG_RETURN(fld); } /* + Find field in table, no side effects, only purpose is to check for field + in table object and get reference to the field if found. + + SYNOPSIS + find_field_in_table_sef() + + table table where to find + name Name of field searched for + + RETURN + 0 field is not found + # pointer to field +*/ + +Field *find_field_in_table_sef(TABLE *table, const char *name) +{ + Field **field_ptr; + if (table->s->name_hash.records) + { + field_ptr= (Field**)hash_search(&table->s->name_hash,(byte*) name, + strlen(name)); + if (field_ptr) + { + /* + field_ptr points to field in TABLE_SHARE. Convert it to the matching + field in table + */ + field_ptr= (table->field + (field_ptr - table->s->field)); + } + } + else + { + if (!(field_ptr= table->field)) + return (Field *)0; + for (; *field_ptr; ++field_ptr) + if (!my_strcasecmp(system_charset_info, (*field_ptr)->field_name, name)) + break; + } + if (field_ptr) + return *field_ptr; + else + return (Field *)0; +} + + +/* Find field in table list. SYNOPSIS @@ -3754,15 +4581,19 @@ mark_common_columns(THD *thd, TABLE_LIST *table_ref_1, TABLE_LIST *table_ref_2, if (field_1) { + TABLE *table_1= nj_col_1->table_ref->table; /* Mark field_1 used for table cache. */ field_1->query_id= thd->query_id; - nj_col_1->table_ref->table->used_keys.intersect(field_1->part_of_key); + table_1->file->ha_set_bit_in_read_set(field_1->fieldnr); + table_1->used_keys.intersect(field_1->part_of_key); } if (field_2) { + TABLE *table_2= nj_col_2->table_ref->table; /* Mark field_2 used for table cache. */ field_2->query_id= thd->query_id; - nj_col_2->table_ref->table->used_keys.intersect(field_2->part_of_key); + table_2->file->ha_set_bit_in_read_set(field_2->fieldnr); + table_2->used_keys.intersect(field_2->part_of_key); } if (using_fields != NULL) @@ -4229,16 +5060,17 @@ int setup_wild(THD *thd, TABLE_LIST *tables, List<Item> &fields, ****************************************************************************/ bool setup_fields(THD *thd, Item **ref_pointer_array, - List<Item> &fields, bool set_query_id, + List<Item> &fields, ulong set_query_id, List<Item> *sum_func_list, bool allow_sum_func) { reg2 Item *item; - bool save_set_query_id= thd->set_query_id; + ulong save_set_query_id= thd->set_query_id; nesting_map save_allow_sum_func= thd->lex->allow_sum_func; List_iterator<Item> it(fields); DBUG_ENTER("setup_fields"); thd->set_query_id=set_query_id; + DBUG_PRINT("info", ("thd->set_query_id: %d", thd->set_query_id)); if (allow_sum_func) thd->lex->allow_sum_func|= 1 << thd->lex->current_select->nest_level; thd->where= THD::DEFAULT_WHERE; @@ -4265,6 +5097,7 @@ bool setup_fields(THD *thd, Item **ref_pointer_array, { thd->lex->allow_sum_func= save_allow_sum_func; thd->set_query_id= save_set_query_id; + DBUG_PRINT("info", ("thd->set_query_id: %d", thd->set_query_id)); DBUG_RETURN(TRUE); /* purecov: inspected */ } if (ref) @@ -4276,6 +5109,7 @@ bool setup_fields(THD *thd, Item **ref_pointer_array, } thd->lex->allow_sum_func= save_allow_sum_func; thd->set_query_id= save_set_query_id; + DBUG_PRINT("info", ("thd->set_query_id: %d", thd->set_query_id)); DBUG_RETURN(test(thd->net.report_error)); } @@ -4456,7 +5290,7 @@ bool get_key_map_from_key_list(key_map *map, TABLE *table, 0) { my_error(ER_KEY_COLUMN_DOES_NOT_EXITS, MYF(0), name->c_ptr(), - table->s->table_name); + table->s->table_name.str); map->set_all(); return 1; } @@ -4619,6 +5453,7 @@ insert_fields(THD *thd, Name_resolution_context *context, const char *db_name, if (field->query_id == thd->query_id) thd->dupp_field= field; field->query_id= thd->query_id; + field->table->file->ha_set_bit_in_read_set(field->fieldnr); if (table) table->used_keys.intersect(field->part_of_key); @@ -4658,7 +5493,10 @@ insert_fields(THD *thd, Name_resolution_context *context, const char *db_name, For NATURAL joins, used_tables is updated in the IF above. */ if (table) + { table->used_fields= table->s->fields; + table->file->ha_set_all_bits_in_read_set(); + } } if (found) DBUG_RETURN(FALSE); @@ -4718,6 +5556,7 @@ int setup_conds(THD *thd, TABLE_LIST *tables, TABLE_LIST *leaves, arena= 0; // For easier test thd->set_query_id=1; + DBUG_PRINT("info", ("thd->set_query_id: %d", thd->set_query_id)); select_lex->cond_count= 0; for (table= tables; table; table= table->next_local) @@ -4955,28 +5794,28 @@ static void mysql_rm_tmp_tables(void) for (i=0; i<=mysql_tmpdir_list.max; i++) { tmpdir=mysql_tmpdir_list.list[i]; - /* See if the directory exists */ + /* See if the directory exists */ if (!(dirp = my_dir(tmpdir,MYF(MY_WME | MY_DONT_SORT)))) continue; /* Remove all SQLxxx tables from directory */ - for (idx=0 ; idx < (uint) dirp->number_off_files ; idx++) - { - file=dirp->dir_entry+idx; + for (idx=0 ; idx < (uint) dirp->number_off_files ; idx++) + { + file=dirp->dir_entry+idx; - /* skiping . and .. */ - if (file->name[0] == '.' && (!file->name[1] || - (file->name[1] == '.' && !file->name[2]))) - continue; + /* skiping . and .. */ + if (file->name[0] == '.' && (!file->name[1] || + (file->name[1] == '.' && !file->name[2]))) + continue; - if (!bcmp(file->name,tmp_file_prefix,tmp_file_prefix_length)) - { + if (!bcmp(file->name,tmp_file_prefix,tmp_file_prefix_length)) + { sprintf(filePath,"%s%s",tmpdir,file->name); VOID(my_delete(filePath,MYF(MY_WME))); + } } - } - my_dirend(dirp); + my_dirend(dirp); } DBUG_VOID_RETURN; } @@ -5005,7 +5844,7 @@ void remove_db_from_cache(const char *db) for (uint idx=0 ; idx < open_cache.records ; idx++) { TABLE *table=(TABLE*) hash_element(&open_cache,idx); - if (!strcmp(table->s->db, db)) + if (!strcmp(table->s->db.str, db)) { table->s->version= 0L; /* Free when thread is ready */ if (!table->in_use) @@ -5018,7 +5857,11 @@ void remove_db_from_cache(const char *db) /* -** free all unused tables + free all unused tables + + NOTE + This is called by 'handle_manager' when one wants to periodicly flush + all not used tables. */ void flush_tables() @@ -5051,7 +5894,8 @@ bool remove_table_from_cache(THD *thd, const char *db, const char *table_name, char key[MAX_DBKEY_LENGTH]; uint key_length; TABLE *table; - bool result=0, signalled= 0; + TABLE_SHARE *share; + bool result= 0, signalled= 0; DBUG_ENTER("remove_table_from_cache"); key_length=(uint) (strmov(strmov(key,db)+1,table_name)-key)+1; @@ -5067,6 +5911,7 @@ bool remove_table_from_cache(THD *thd, const char *db, const char *table_name, &state)) { THD *in_use; + table->s->version=0L; /* Free when thread is ready */ if (!(in_use=table->in_use)) { @@ -5075,6 +5920,7 @@ bool remove_table_from_cache(THD *thd, const char *db, const char *table_name, } else if (in_use != thd) { + DBUG_PRINT("info", ("Table was in use by other thread")); in_use->some_tables_deleted=1; if (table->db_stat) result=1; @@ -5106,10 +5952,30 @@ bool remove_table_from_cache(THD *thd, const char *db, const char *table_name, } } else + { + DBUG_PRINT("info", ("Table was in use by current thread. db_stat: %u", + table->db_stat)); result= result || (flags & RTFC_OWNED_BY_THD_FLAG); + } } while (unused_tables && !unused_tables->s->version) VOID(hash_delete(&open_cache,(byte*) unused_tables)); + + DBUG_PRINT("info", ("Removing table from table_def_cache")); + /* Remove table from table definition cache if it's not in use */ + if ((share= (TABLE_SHARE*) hash_search(&table_def_cache,(byte*) key, + key_length))) + { + DBUG_PRINT("info", ("share version: %lu ref_count: %u", + share->version, share->ref_count)); + share->version= 0; // Mark for delete + if (share->ref_count == 0) + { + pthread_mutex_lock(&share->mutex); + VOID(hash_delete(&table_def_cache, (byte*) share)); + } + } + if (result && (flags & RTFC_WAIT_OTHER_THREAD_FLAG)) { if (!(flags & RTFC_CHECK_KILLED_FLAG) || !thd->killed) @@ -5142,6 +6008,7 @@ bool remove_table_from_cache(THD *thd, const char *db, const char *table_name, DBUG_RETURN(result); } + int setup_ftfuncs(SELECT_LEX *select_lex) { List_iterator<Item_func_match> li(*(select_lex->ftfunc_list)), @@ -5186,7 +6053,7 @@ int init_ftfuncs(THD *thd, SELECT_LEX *select_lex, bool no_order) SYNOPSIS open_new_frm() THD thread handler - path path to .frm + path path to .frm file (without extension) alias alias for table db database table_name name of table @@ -5200,18 +6067,20 @@ int init_ftfuncs(THD *thd, SELECT_LEX *select_lex, bool no_order) */ static bool -open_new_frm(THD *thd, const char *path, const char *alias, - const char *db, const char *table_name, +open_new_frm(THD *thd, TABLE_SHARE *share, const char *alias, uint db_stat, uint prgflag, uint ha_open_flags, TABLE *outparam, TABLE_LIST *table_desc, MEM_ROOT *mem_root) { LEX_STRING pathstr; File_parser *parser; + char path[FN_REFLEN]; DBUG_ENTER("open_new_frm"); - pathstr.str= (char*) path; - pathstr.length= strlen(path); + /* Create path with extension */ + pathstr.length= (uint) (strxmov(path, share->normalized_path.str, reg_ext, + NullS)- path); + pathstr.str= path; if ((parser= sql_parse_prepare(&pathstr, mem_root, 1))) { @@ -5219,7 +6088,8 @@ open_new_frm(THD *thd, const char *path, const char *alias, { if (table_desc == 0 || table_desc->required_type == FRMTYPE_TABLE) { - my_error(ER_WRONG_OBJECT, MYF(0), db, table_name, "BASE TABLE"); + my_error(ER_WRONG_OBJECT, MYF(0), share->db.str, share->table_name.str, + "BASE TABLE"); goto err; } if (mysql_make_view(thd, parser, table_desc)) @@ -5228,7 +6098,7 @@ open_new_frm(THD *thd, const char *path, const char *alias, else { /* only VIEWs are supported now */ - my_error(ER_FRM_UNKNOWN_TYPE, MYF(0), path, parser->type()->str); + my_error(ER_FRM_UNKNOWN_TYPE, MYF(0), share->path, parser->type()->str); goto err; } DBUG_RETURN(0); @@ -5244,3 +6114,155 @@ bool is_equal(const LEX_STRING *a, const LEX_STRING *b) { return a->length == b->length && !strncmp(a->str, b->str, a->length); } + + +/* + SYNOPSIS + abort_and_upgrade_lock() + lpt Parameter passing struct + All parameters passed through the ALTER_PARTITION_PARAM_TYPE object + RETURN VALUES + TRUE Failure + FALSE Success + DESCRIPTION + Remember old lock level (for possible downgrade later on), abort all + waiting threads and ensure that all keeping locks currently are + completed such that we own the lock exclusively and no other interaction + is ongoing. + + thd Thread object + table Table object + db Database name + table_name Table name + old_lock_level Old lock level +*/ + +bool abort_and_upgrade_lock(ALTER_PARTITION_PARAM_TYPE *lpt) +{ + uint flags= RTFC_WAIT_OTHER_THREAD_FLAG | RTFC_CHECK_KILLED_FLAG; + int error= FALSE; + DBUG_ENTER("abort_and_upgrade_locks"); + + lpt->old_lock_type= lpt->table->reginfo.lock_type; + VOID(pthread_mutex_lock(&LOCK_open)); + mysql_lock_abort(lpt->thd, lpt->table, TRUE); + VOID(remove_table_from_cache(lpt->thd, lpt->db, lpt->table_name, flags)); + if (lpt->thd->killed) + { + lpt->thd->no_warnings_for_error= 0; + error= TRUE; + } + VOID(pthread_mutex_unlock(&LOCK_open)); + DBUG_RETURN(error); +} + + +/* + SYNOPSIS + close_open_tables_and_downgrade() + RESULT VALUES + NONE + DESCRIPTION + We need to ensure that any thread that has managed to open the table + but not yet encountered our lock on the table is also thrown out to + ensure that no threads see our frm changes premature to the final + version. The intermediate versions are only meant for use after a + crash and later REPAIR TABLE. + We also downgrade locks after the upgrade to WRITE_ONLY +*/ + +void close_open_tables_and_downgrade(ALTER_PARTITION_PARAM_TYPE *lpt) +{ + VOID(pthread_mutex_lock(&LOCK_open)); + remove_table_from_cache(lpt->thd, lpt->db, lpt->table_name, + RTFC_WAIT_OTHER_THREAD_FLAG); + VOID(pthread_mutex_unlock(&LOCK_open)); + mysql_lock_downgrade_write(lpt->thd, lpt->table, lpt->old_lock_type); +} + + +/* + SYNOPSIS + mysql_wait_completed_table() + lpt Parameter passing struct + my_table My table object + All parameters passed through the ALTER_PARTITION_PARAM object + RETURN VALUES + TRUE Failure + FALSE Success + DESCRIPTION + We have changed the frm file and now we want to wait for all users of + the old frm to complete before proceeding to ensure that no one + remains that uses the old frm definition. + Start by ensuring that all users of the table will be removed from cache + once they are done. Then abort all that have stumbled on locks and + haven't been started yet. + + thd Thread object + table Table object + db Database name + table_name Table name +*/ + +void mysql_wait_completed_table(ALTER_PARTITION_PARAM_TYPE *lpt, TABLE *my_table) +{ + char key[MAX_DBKEY_LENGTH]; + uint key_length; + TABLE *table; + DBUG_ENTER("mysql_wait_completed_table"); + + key_length=(uint) (strmov(strmov(key,lpt->db)+1,lpt->table_name)-key)+1; + VOID(pthread_mutex_lock(&LOCK_open)); + HASH_SEARCH_STATE state; + for (table= (TABLE*) hash_first(&open_cache,(byte*) key,key_length, + &state) ; + table; + table= (TABLE*) hash_next(&open_cache,(byte*) key,key_length, + &state)) + { + THD *in_use= table->in_use; + table->s->version= 0L; + if (!in_use) + { + relink_unused(table); + } + else + { + /* Kill delayed insert threads */ + if ((in_use->system_thread & SYSTEM_THREAD_DELAYED_INSERT) && + ! in_use->killed) + { + in_use->killed= THD::KILL_CONNECTION; + pthread_mutex_lock(&in_use->mysys_var->mutex); + if (in_use->mysys_var->current_cond) + { + pthread_mutex_lock(in_use->mysys_var->current_mutex); + pthread_cond_broadcast(in_use->mysys_var->current_cond); + pthread_mutex_unlock(in_use->mysys_var->current_mutex); + } + pthread_mutex_unlock(&in_use->mysys_var->mutex); + } + /* + Now we must abort all tables locks used by this thread + as the thread may be waiting to get a lock for another table + */ + for (TABLE *thd_table= in_use->open_tables; + thd_table ; + thd_table= thd_table->next) + { + if (thd_table->db_stat) // If table is open + mysql_lock_abort_for_thread(lpt->thd, thd_table); + } + } + } + /* + We start by removing all unused objects from the cache and marking + those in use for removal after completion. Now we also need to abort + all that are locked and are not progressing due to being locked + by our lock. We don't upgrade our lock here. + */ + mysql_lock_abort(lpt->thd, my_table, FALSE); + VOID(pthread_mutex_unlock(&LOCK_open)); + DBUG_VOID_RETURN; +} + diff --git a/sql/sql_binlog.cc b/sql/sql_binlog.cc new file mode 100644 index 00000000000..0939ad66cd0 --- /dev/null +++ b/sql/sql_binlog.cc @@ -0,0 +1,135 @@ +/* Copyright (C) 2005 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "mysql_priv.h" +#include "base64.h" + +/* + Execute a BINLOG statement + + TODO: This currently assumes a MySQL 5.x binlog. + When we'll have binlog with a different format, to execute the + BINLOG command properly the server will need to know which format + the BINLOG command's event is in. mysqlbinlog should then send + the Format_description_log_event of the binlog it reads and the + server thread should cache this format into + rli->description_event_for_exec. +*/ + +void mysql_client_binlog_statement(THD* thd) +{ + DBUG_PRINT("info",("binlog base64: '%*s'", + (thd->lex->comment.length < 2048 ? + thd->lex->comment.length : 2048), + thd->lex->comment.str)); + + /* + Temporarily turn off send_ok, since different events handle this + differently + */ + my_bool nsok= thd->net.no_send_ok; + thd->net.no_send_ok= TRUE; + + const my_size_t coded_len= thd->lex->comment.length + 1; + const my_size_t event_len= base64_needed_decoded_length(coded_len); + DBUG_ASSERT(coded_len > 0); + + /* + Allocation + */ + if (!thd->rli_fake) + thd->rli_fake= new RELAY_LOG_INFO; + + const Format_description_log_event *desc= + new Format_description_log_event(4); + + const char *error= 0; + char *buf= (char *) my_malloc(event_len, MYF(MY_WME)); + Log_event *ev = 0; + int res; + + /* + Out of memory check + */ + if (!(thd->rli_fake && desc && buf)) + { + my_error(ER_OUTOFMEMORY, MYF(0), 1); /* needed 1 bytes */ + goto end; + } + + thd->rli_fake->sql_thd= thd; + thd->rli_fake->no_storage= TRUE; + + res= base64_decode(thd->lex->comment.str, coded_len, buf); + + DBUG_PRINT("info",("binlog base64 decoded_len=%d, event_len=%d\n", + res, uint4korr(buf + EVENT_LEN_OFFSET))); + /* + Note that 'res' is the correct event length, 'event_len' was + calculated based on the base64-string that possibly contained + extra spaces, so it can be longer than the real event. + */ + if (res < EVENT_LEN_OFFSET + || (uint) res != uint4korr(buf+EVENT_LEN_OFFSET)) + { + my_error(ER_SYNTAX_ERROR, MYF(0)); + goto end; + } + + ev= Log_event::read_log_event(buf, res, &error, desc); + + DBUG_PRINT("info",("binlog base64 err=%s", error)); + if (!ev) + { + /* + This could actually be an out-of-memory, but it is more + likely causes by a bad statement + */ + my_error(ER_SYNTAX_ERROR, MYF(0)); + goto end; + } + + DBUG_PRINT("info",("ev->get_type_code()=%d", ev->get_type_code())); + DBUG_PRINT("info",("buf+EVENT_TYPE_OFFSET=%d", buf+EVENT_TYPE_OFFSET)); + + ev->thd= thd; + if (ev->exec_event(thd->rli_fake)) + { + my_error(ER_UNKNOWN_ERROR, MYF(0), "Error executing BINLOG statement"); + goto end; + } + + /* + Restore setting of no_send_ok + */ + thd->net.no_send_ok= nsok; + + DBUG_PRINT("info",("binlog base64 execution finished successfully")); + send_ok(thd); + +end: + /* + Restore setting of no_send_ok + */ + thd->net.no_send_ok= nsok; + + if (ev) + delete ev; + if (desc) + delete desc; + if (buf) + my_free(buf, MYF(0)); +} diff --git a/sql/sql_bitmap.h b/sql/sql_bitmap.h index 0f5b6dcd35e..35c501ede56 100644 --- a/sql/sql_bitmap.h +++ b/sql/sql_bitmap.h @@ -25,7 +25,7 @@ template <uint default_width> class Bitmap { MY_BITMAP map; - uchar buffer[(default_width+7)/8]; + uint32 buffer[(default_width+31)/32]; public: Bitmap() { init(); } Bitmap(const Bitmap& from) { *this=from; } @@ -48,14 +48,14 @@ public: void intersect(ulonglong map2buff) { MY_BITMAP map2; - bitmap_init(&map2, (uchar *)&map2buff, sizeof(ulonglong)*8, 0); + bitmap_init(&map2, (uint32 *)&map2buff, sizeof(ulonglong)*8, 0); bitmap_intersect(&map, &map2); } /* Use highest bit for all bits above sizeof(ulonglong)*8. */ void intersect_extended(ulonglong map2buff) { intersect(map2buff); - if (map.bitmap_size > sizeof(ulonglong)) + if (map.n_bits > sizeof(ulonglong) * 8) bitmap_set_above(&map, sizeof(ulonglong), test(map2buff & (LL(1) << (sizeof(ulonglong) * 8 - 1)))); } @@ -70,7 +70,7 @@ public: char *print(char *buf) const { char *s=buf; - const uchar *e=buffer, *b=e+sizeof(buffer)-1; + const uchar *e=(uchar *)buffer, *b=e+sizeof(buffer)-1; while (!*b && b>e) b--; if ((*s=_dig_vec_upper[*b >> 4]) != '0') diff --git a/sql/sql_cache.cc b/sql/sql_cache.cc index cf3ba9c8c40..5b060aa13c6 100644 --- a/sql/sql_cache.cc +++ b/sql/sql_cache.cc @@ -303,7 +303,7 @@ TODO list: #ifndef MASTER #include "../srclib/myisammrg/myrg_def.h" #else -#include "../myisammrg/myrg_def.h" +#include "../storage/myisammrg/myrg_def.h" #endif #ifdef EMBEDDED_LIBRARY @@ -316,13 +316,13 @@ TODO list: #define MUTEX_UNLOCK(M) {DBUG_PRINT("lock", ("mutex unlock 0x%lx",\ (ulong)(M))); pthread_mutex_unlock(M);} #define RW_WLOCK(M) {DBUG_PRINT("lock", ("rwlock wlock 0x%lx",(ulong)(M))); \ - if (!rw_wrlock(M)) DBUG_PRINT("lock", ("rwlock wlock ok")) \ + if (!rw_wrlock(M)) DBUG_PRINT("lock", ("rwlock wlock ok")); \ else DBUG_PRINT("lock", ("rwlock wlock FAILED %d", errno)); } #define RW_RLOCK(M) {DBUG_PRINT("lock", ("rwlock rlock 0x%lx", (ulong)(M))); \ - if (!rw_rdlock(M)) DBUG_PRINT("lock", ("rwlock rlock ok")) \ + if (!rw_rdlock(M)) DBUG_PRINT("lock", ("rwlock rlock ok")); \ else DBUG_PRINT("lock", ("rwlock wlock FAILED %d", errno)); } #define RW_UNLOCK(M) {DBUG_PRINT("lock", ("rwlock unlock 0x%lx",(ulong)(M))); \ - if (!rw_unlock(M)) DBUG_PRINT("lock", ("rwlock unlock ok")) \ + if (!rw_unlock(M)) DBUG_PRINT("lock", ("rwlock unlock ok")); \ else DBUG_PRINT("lock", ("rwlock unlock FAILED %d", errno)); } #define STRUCT_LOCK(M) {DBUG_PRINT("lock", ("%d struct lock...",__LINE__)); \ pthread_mutex_lock(M);DBUG_PRINT("lock", ("struct lock OK"));} @@ -850,7 +850,7 @@ sql mode: 0x%lx, sort len: %lu, conncat len: %lu", if (thd->db_length) { memcpy(thd->query+thd->query_length+1, thd->db, thd->db_length); - DBUG_PRINT("qcache", ("database : %s length %u", + DBUG_PRINT("qcache", ("database: %s length: %u", thd->db, thd->db_length)); } else @@ -986,9 +986,10 @@ Query_cache::send_result_to_client(THD *thd, char *sql, uint query_length) while (sql[i]=='(') i++; + /* Test if the query is a SELECT - (pre-space is removed in dispatch_command) + (pre-space is removed in dispatch_command). First '/' looks like comment before command it is not frequently appeared in real lihe, consequently we can @@ -997,7 +998,7 @@ Query_cache::send_result_to_client(THD *thd, char *sql, uint query_length) if ((my_toupper(system_charset_info, sql[i]) != 'S' || my_toupper(system_charset_info, sql[i + 1]) != 'E' || my_toupper(system_charset_info, sql[i + 2]) != 'L') && - sql[i] != '/') + sql[0] != '/') { DBUG_PRINT("qcache", ("The statement is not a SELECT; Not cached")); goto err; @@ -1016,7 +1017,7 @@ Query_cache::send_result_to_client(THD *thd, char *sql, uint query_length) if (thd->db_length) { memcpy(sql+query_length+1, thd->db, thd->db_length); - DBUG_PRINT("qcache", ("database: '%s' length %u", + DBUG_PRINT("qcache", ("database: '%s' length: %u", thd->db, thd->db_length)); } else @@ -1113,9 +1114,9 @@ sql mode: 0x%lx, sort len: %lu, conncat len: %lu", */ for (tmptable= thd->temporary_tables; tmptable ; tmptable= tmptable->next) { - if (tmptable->s->key_length - TMP_TABLE_KEY_EXTRA == + if (tmptable->s->table_cache_key.length - TMP_TABLE_KEY_EXTRA == table->key_length() && - !memcmp(tmptable->s->table_cache_key, table->data(), + !memcmp(tmptable->s->table_cache_key.str, table->data(), table->key_length())) { DBUG_PRINT("qcache", @@ -1278,7 +1279,7 @@ void Query_cache::invalidate(CHANGED_TABLE_LIST *tables_used) for (; tables_used; tables_used= tables_used->next) { invalidate_table((byte*) tables_used->key, tables_used->key_length); - DBUG_PRINT("qcache", (" db %s, table %s", tables_used->key, + DBUG_PRINT("qcache", ("db: %s table: %s", tables_used->key, tables_used->key+ strlen(tables_used->key)+1)); } @@ -2145,7 +2146,8 @@ void Query_cache::invalidate_table(TABLE_LIST *table_list) void Query_cache::invalidate_table(TABLE *table) { - invalidate_table((byte*) table->s->table_cache_key, table->s->key_length); + invalidate_table((byte*) table->s->table_cache_key.str, + table->s->table_cache_key.length); } void Query_cache::invalidate_table(byte * key, uint32 key_length) @@ -2206,7 +2208,7 @@ Query_cache::register_tables_from_list(TABLE_LIST *tables_used, { char key[MAX_DBKEY_LENGTH]; uint key_length; - DBUG_PRINT("qcache", ("view %s, db %s", + DBUG_PRINT("qcache", ("view: %s db: %s", tables_used->view_name.str, tables_used->view_db.str)); key_length= (uint) (strmov(strmov(key, tables_used->view_db.str) + 1, @@ -2226,21 +2228,22 @@ Query_cache::register_tables_from_list(TABLE_LIST *tables_used, else { DBUG_PRINT("qcache", - ("table %s, db %s, openinfo at 0x%lx, keylen %u, key at 0x%lx", - tables_used->table->s->table_name, - tables_used->table->s->table_cache_key, + ("table: %s db: %s openinfo: 0x%lx keylen: %u key: 0x%lx", + tables_used->table->s->table_name.str, + tables_used->table->s->table_cache_key.str, (ulong) tables_used->table, - tables_used->table->s->key_length, - (ulong) tables_used->table->s->table_cache_key)); - if (!insert_table(tables_used->table->s->key_length, - tables_used->table->s->table_cache_key, block_table, + tables_used->table->s->table_cache_key.length, + (ulong) tables_used->table->s->table_cache_key.str)); + if (!insert_table(tables_used->table->s->table_cache_key.length, + tables_used->table->s->table_cache_key.str, + block_table, tables_used->db_length, tables_used->table->file->table_cache_type(), tables_used->callback_func, tables_used->engine_data)) DBUG_RETURN(0); - if (tables_used->table->s->db_type == DB_TYPE_MRG_MYISAM) + if (tables_used->table->s->db_type == &myisammrg_hton) { ha_myisammrg *handler = (ha_myisammrg *) tables_used->table->file; MYRG_INFO *file = handler->myrg_info(); @@ -2833,16 +2836,16 @@ static TABLE_COUNTER_TYPE process_and_count_tables(TABLE_LIST *tables_used, table_count++; if (tables_used->view) { - DBUG_PRINT("qcache", ("view %s, db %s", + DBUG_PRINT("qcache", ("view: %s db: %s", tables_used->view_name.str, tables_used->view_db.str)); *tables_type|= HA_CACHE_TBL_NONTRANSACT; } else { - DBUG_PRINT("qcache", ("table %s, db %s, type %u", - tables_used->table->s->table_name, - tables_used->table->s->table_cache_key, + DBUG_PRINT("qcache", ("table: %s db: %s type: %u", + tables_used->table->s->table_name.str, + tables_used->table->s->db.str, tables_used->table->s->db_type)); if (tables_used->derived) { @@ -2860,15 +2863,15 @@ static TABLE_COUNTER_TYPE process_and_count_tables(TABLE_LIST *tables_used, (*tables_type & HA_CACHE_TBL_NOCACHE) || (tables_used->db_length == 5 && my_strnncoll(table_alias_charset, - (uchar*)tables_used->table->s->table_cache_key, 6, + (uchar*)tables_used->table->s->table_cache_key.str, 6, (uchar*)"mysql",6) == 0)) { DBUG_PRINT("qcache", - ("select not cacheable: temporary, system or \ - other non-cacheable table(s)")); + ("select not cacheable: temporary, system or " + "other non-cacheable table(s)")); DBUG_RETURN(0); } - if (tables_used->table->s->db_type == DB_TYPE_MRG_MYISAM) + if (tables_used->table->s->db_type == &myisammrg_hton) { ha_myisammrg *handler = (ha_myisammrg *)tables_used->table->file; MYRG_INFO *file = handler->myrg_info(); @@ -2947,11 +2950,13 @@ my_bool Query_cache::ask_handler_allowance(THD *thd, for (; tables_used; tables_used= tables_used->next_global) { TABLE *table; + handler *handler; if (!(table= tables_used->table)) continue; - handler *handler= table->file; - if (!handler->register_query_cache_table(thd, table->s->table_cache_key, - table->s->key_length, + handler= table->file; + if (!handler->register_query_cache_table(thd, + table->s->table_cache_key.str, + table->s->table_cache_key.length, &tables_used->callback_func, &tables_used->engine_data)) { diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 59391a333c3..d391584a3be 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -27,6 +27,8 @@ #endif #include "mysql_priv.h" +#include <my_bitmap.h> +#include "log_event.h" #include <m_ctype.h> #include <sys/stat.h> #include <thr_alarm.h> @@ -165,6 +167,25 @@ Open_tables_state::Open_tables_state(ulong version_arg) reset_open_tables_state(); } +my_bool thd_in_lock_tables(const THD *thd) +{ + return thd->in_lock_tables; +} + + +my_bool thd_tablespace_op(const THD *thd) +{ + return thd->tablespace_op; +} + + +const char *thd_proc_info(THD *thd, const char *info) +{ + const char *old_info= thd->proc_info; + thd->proc_info= info; + return old_info; +} + /* Pass nominal parameters to Statement constructor only to ensure that @@ -174,7 +195,7 @@ Open_tables_state::Open_tables_state(ulong version_arg) THD::THD() :Statement(CONVENTIONAL_EXECUTION, 0, ALLOC_ROOT_MIN_BLOCK_SIZE, 0), - Open_tables_state(refresh_version), + Open_tables_state(refresh_version), rli_fake(0), lock_id(&main_lock_id), user_time(0), in_sub_stmt(0), global_read_lock(0), is_fatal_error(0), rand_used(0), time_zone_used(0), @@ -227,6 +248,9 @@ THD::THD() ull=0; system_thread= cleanup_done= abort_on_warning= no_warnings_for_error= 0; peer_port= 0; // For SHOW PROCESSLIST +#ifdef HAVE_ROW_BASED_REPLICATION + transaction.m_pending_rows_event= 0; +#endif #ifdef __WIN__ real_id = 0; #endif @@ -288,7 +312,7 @@ void THD::init(void) variables.date_format); variables.datetime_format= date_time_format_copy((THD*) 0, variables.datetime_format); -#ifdef HAVE_NDBCLUSTER_DB +#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE variables.ndb_use_transactions= 1; #endif pthread_mutex_unlock(&LOCK_global_system_variables); @@ -305,6 +329,7 @@ void THD::init(void) bzero((char*) warn_count, sizeof(warn_count)); total_warn_count= 0; update_charset(); + reset_current_stmt_binlog_row_based(); bzero((char *) &status_var, sizeof(status_var)); } @@ -440,6 +465,11 @@ THD::~THD() #ifndef DBUG_OFF dbug_sentry= THD_SENTRY_GONE; #endif +#ifndef EMBEDDED_LIBRARY + if (rli_fake) + delete rli_fake; +#endif + DBUG_VOID_RETURN; } @@ -675,7 +705,8 @@ void THD::add_changed_table(TABLE *table) DBUG_ASSERT((options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) && table->file->has_transactions()); - add_changed_table(table->s->table_cache_key, table->s->key_length); + add_changed_table(table->s->table_cache_key.str, + table->s->table_cache_key.length); DBUG_VOID_RETURN; } @@ -752,6 +783,16 @@ int THD::send_explain_fields(select_result *result) field_list.push_back(new Item_empty_string("select_type", 19, cs)); field_list.push_back(item= new Item_empty_string("table", NAME_LEN, cs)); item->maybe_null= 1; +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (lex->describe & DESCRIBE_PARTITIONS) + { + /* Maximum length of string that make_used_partitions_str() can produce */ + item= new Item_empty_string("partitions", MAX_PARTITIONS * (1 + FN_LEN), + cs); + field_list.push_back(item); + item->maybe_null= 1; + } +#endif field_list.push_back(item= new Item_empty_string("type", 10, cs)); item->maybe_null= 1; field_list.push_back(item=new Item_empty_string("possible_keys", @@ -919,7 +960,7 @@ bool select_send::send_data(List<Item> &items) return 0; } -#ifdef HAVE_INNOBASE_DB +#ifdef WITH_INNOBASE_STORAGE_ENGINE /* We may be passing the control from mysqld to the client: release the InnoDB adaptive hash S-latch to avoid thread deadlocks if it was reserved @@ -956,7 +997,7 @@ bool select_send::send_data(List<Item> &items) bool select_send::send_eof() { -#ifdef HAVE_INNOBASE_DB +#ifdef WITH_INNOBASE_STORAGE_ENGINE /* We may be passing the control from mysqld to the client: release the InnoDB adaptive hash S-latch to avoid thread deadlocks if it was reserved by thd */ @@ -1071,7 +1112,8 @@ static File create_file(THD *thd, char *path, sql_exchange *exchange, if (!dirname_length(exchange->file_name)) { - strxnmov(path, FN_REFLEN, mysql_real_data_home, thd->db ? thd->db : "", NullS); + strxnmov(path, FN_REFLEN-1, mysql_real_data_home, thd->db ? thd->db : "", + NullS); (void) fn_format(path, exchange->file_name, path, "", option); } else @@ -1969,7 +2011,27 @@ void THD::reset_sub_statement_state(Sub_statement_state *backup, backup->client_capabilities= client_capabilities; backup->savepoints= transaction.savepoints; - if (!lex->requires_prelocking() || is_update_query(lex->sql_command)) +#ifdef HAVE_ROW_BASED_REPLICATION + /* + For row-based replication and before executing a function/trigger, + the pending rows event has to be flushed. The function/trigger + might execute statement that require the pending event to be + flushed. A simple example: + + CREATE FUNCTION foo() RETURNS INT + BEGIN + SAVEPOINT x; + RETURN 0; + END + + INSERT INTO t1 VALUES (1), (foo()), (2); + */ + if (current_stmt_binlog_row_based) + binlog_flush_pending_rows_event(false); +#endif /* HAVE_ROW_BASED_REPLICATION */ + + if ((!lex->requires_prelocking() || is_update_query(lex->sql_command)) && + !current_stmt_binlog_row_based) options&= ~OPTION_BIN_LOG; /* Disable result sets */ client_capabilities &= ~CLIENT_MULTI_RESULTS; @@ -2109,3 +2171,447 @@ void xid_cache_delete(XID_STATE *xid_state) pthread_mutex_unlock(&LOCK_xid_cache); } +/* + Implementation of interface to write rows to the binary log through the + thread. The thread is responsible for writing the rows it has + inserted/updated/deleted. +*/ + +#ifndef MYSQL_CLIENT +#ifdef HAVE_ROW_BASED_REPLICATION + +/* + Template member function for ensuring that there is an rows log + event of the apropriate type before proceeding. + + PRE CONDITION: + - Events of type 'RowEventT' have the type code 'type_code'. + + POST CONDITION: + If a non-NULL pointer is returned, the pending event for thread 'thd' will + be an event of type 'RowEventT' (which have the type code 'type_code') + will either empty or have enough space to hold 'needed' bytes. In + addition, the columns bitmap will be correct for the row, meaning that + the pending event will be flushed if the columns in the event differ from + the columns suppled to the function. + + RETURNS + If no error, a non-NULL pending event (either one which already existed or + the newly created one). + If error, NULL. + */ + +template <class RowsEventT> Rows_log_event* +THD::binlog_prepare_pending_rows_event(TABLE* table, uint32 serv_id, + MY_BITMAP const* cols, + my_size_t colcnt, + my_size_t needed, + bool is_transactional, + RowsEventT *hint __attribute__((unused))) +{ + /* Pre-conditions */ + DBUG_ASSERT(table->s->table_map_id != ULONG_MAX); + + /* Fetch the type code for the RowsEventT template parameter */ + int const type_code= RowsEventT::TYPE_CODE; + + /* + There is no good place to set up the transactional data, so we + have to do it here. + */ + if (binlog_setup_trx_data()) + return NULL; + + Rows_log_event* pending= binlog_get_pending_rows_event(); + + if (unlikely(pending && !pending->is_valid())) + return NULL; + + /* + Check if the current event is non-NULL and a write-rows + event. Also check if the table provided is mapped: if it is not, + then we have switched to writing to a new table. + If there is no pending event, we need to create one. If there is a pending + event, but it's not about the same table id, or not of the same type + (between Write, Update and Delete), or not the same affected columns, or + going to be too big, flush this event to disk and create a new pending + event. + */ + if (!pending || + pending->server_id != serv_id || + pending->get_table_id() != table->s->table_map_id || + pending->get_type_code() != type_code || + pending->get_data_size() + needed > opt_binlog_rows_event_max_size || + pending->get_width() != colcnt || + !bitmap_cmp(pending->get_cols(), cols)) + { + /* Create a new RowsEventT... */ + Rows_log_event* const + ev= new RowsEventT(this, table, table->s->table_map_id, cols, + is_transactional); + if (unlikely(!ev)) + return NULL; + ev->server_id= serv_id; // I don't like this, it's too easy to forget. + /* + flush the pending event and replace it with the newly created + event... + */ + if (unlikely(mysql_bin_log.flush_and_set_pending_rows_event(this, ev))) + { + delete ev; + return NULL; + } + + return ev; /* This is the new pending event */ + } + return pending; /* This is the current pending event */ +} + +#ifdef HAVE_EXPLICIT_TEMPLATE_INSTANTIATION +/* + Instansiate the versions we need, we have -fno-implicit-template as + compiling option. +*/ +template Rows_log_event* +THD::binlog_prepare_pending_rows_event(TABLE*, uint32, MY_BITMAP const*, + my_size_t, my_size_t, bool, + Write_rows_log_event*); + +template Rows_log_event* +THD::binlog_prepare_pending_rows_event(TABLE*, uint32, MY_BITMAP const*, + my_size_t colcnt, my_size_t, bool, + Delete_rows_log_event *); + +template Rows_log_event* +THD::binlog_prepare_pending_rows_event(TABLE*, uint32, MY_BITMAP const*, + my_size_t colcnt, my_size_t, bool, + Update_rows_log_event *); +#endif +static char const* +field_type_name(enum_field_types type) +{ + switch (type) + { + case MYSQL_TYPE_DECIMAL: + return "MYSQL_TYPE_DECIMAL"; + case MYSQL_TYPE_TINY: + return "MYSQL_TYPE_TINY"; + case MYSQL_TYPE_SHORT: + return "MYSQL_TYPE_SHORT"; + case MYSQL_TYPE_LONG: + return "MYSQL_TYPE_LONG"; + case MYSQL_TYPE_FLOAT: + return "MYSQL_TYPE_FLOAT"; + case MYSQL_TYPE_DOUBLE: + return "MYSQL_TYPE_DOUBLE"; + case MYSQL_TYPE_NULL: + return "MYSQL_TYPE_NULL"; + case MYSQL_TYPE_TIMESTAMP: + return "MYSQL_TYPE_TIMESTAMP"; + case MYSQL_TYPE_LONGLONG: + return "MYSQL_TYPE_LONGLONG"; + case MYSQL_TYPE_INT24: + return "MYSQL_TYPE_INT24"; + case MYSQL_TYPE_DATE: + return "MYSQL_TYPE_DATE"; + case MYSQL_TYPE_TIME: + return "MYSQL_TYPE_TIME"; + case MYSQL_TYPE_DATETIME: + return "MYSQL_TYPE_DATETIME"; + case MYSQL_TYPE_YEAR: + return "MYSQL_TYPE_YEAR"; + case MYSQL_TYPE_NEWDATE: + return "MYSQL_TYPE_NEWDATE"; + case MYSQL_TYPE_VARCHAR: + return "MYSQL_TYPE_VARCHAR"; + case MYSQL_TYPE_BIT: + return "MYSQL_TYPE_BIT"; + case MYSQL_TYPE_NEWDECIMAL: + return "MYSQL_TYPE_NEWDECIMAL"; + case MYSQL_TYPE_ENUM: + return "MYSQL_TYPE_ENUM"; + case MYSQL_TYPE_SET: + return "MYSQL_TYPE_SET"; + case MYSQL_TYPE_TINY_BLOB: + return "MYSQL_TYPE_TINY_BLOB"; + case MYSQL_TYPE_MEDIUM_BLOB: + return "MYSQL_TYPE_MEDIUM_BLOB"; + case MYSQL_TYPE_LONG_BLOB: + return "MYSQL_TYPE_LONG_BLOB"; + case MYSQL_TYPE_BLOB: + return "MYSQL_TYPE_BLOB"; + case MYSQL_TYPE_VAR_STRING: + return "MYSQL_TYPE_VAR_STRING"; + case MYSQL_TYPE_STRING: + return "MYSQL_TYPE_STRING"; + case MYSQL_TYPE_GEOMETRY: + return "MYSQL_TYPE_GEOMETRY"; + } + return "Unknown"; +} + +my_size_t THD::max_row_length_blob(TABLE *table, const byte *data) const +{ + my_size_t length= 0; + TABLE_SHARE *table_s= table->s; + uint* const beg= table_s->blob_field; + uint* const end= beg + table_s->blob_fields; + + for (uint *ptr= beg ; ptr != end ; ++ptr) + { + Field_blob* const blob= (Field_blob*) table->field[*ptr]; + length+= blob->get_length((const char *) (data + blob->offset())) + 2; + } + + return length; +} + +my_size_t THD::pack_row(TABLE *table, MY_BITMAP const* cols, byte *row_data, + const byte *record) const +{ + Field **p_field= table->field, *field= *p_field; + int n_null_bytes= table->s->null_bytes; + my_ptrdiff_t const offset= record - (byte*) table->record[0]; + + memcpy(row_data, record, n_null_bytes); + byte *ptr= row_data+n_null_bytes; + + for (int i= 0 ; field ; i++, p_field++, field= *p_field) + { + if (bitmap_is_set(cols,i)) + ptr= (byte*)field->pack((char *) ptr, field->ptr + offset); + } + + /* + my_ptrdiff_t is signed, size_t is unsigned. Assert that the + conversion will work correctly. + */ + DBUG_ASSERT(ptr - row_data >= 0); + return (static_cast<size_t>(ptr - row_data)); +} + +int THD::binlog_write_row(TABLE* table, bool is_trans, + MY_BITMAP const* cols, my_size_t colcnt, + byte const *record) +{ + DBUG_ASSERT(current_stmt_binlog_row_based && mysql_bin_log.is_open()); + + /* + Pack records into format for transfer. We are allocating more + memory than needed, but that doesn't matter. + */ + bool error= 0; + byte *row_data= table->write_row_record; + my_size_t const max_len= max_row_length(table, record); + + /* + * Allocate room for a row (if needed) + */ + if (!row_data) + { + if (!table->s->blob_fields) + { + /* multiply max_len by 2 so it can be used for update_row as well */ + table->write_row_record= (byte *) alloc_root(&table->mem_root, 2*max_len); + if (!table->write_row_record) + return HA_ERR_OUT_OF_MEM; + row_data= table->write_row_record; + } + else if (unlikely(!(row_data= (byte *) my_malloc(max_len, MYF(MY_WME))))) + return HA_ERR_OUT_OF_MEM; + } + my_size_t const len= pack_row(table, cols, row_data, record); + + Rows_log_event* const ev= + binlog_prepare_pending_rows_event(table, server_id, cols, colcnt, + len, is_trans, + static_cast<Write_rows_log_event*>(0)); + + /* add_row_data copies row_data to internal buffer */ + error= likely(ev != 0) ? ev->add_row_data(row_data,len) : HA_ERR_OUT_OF_MEM ; + + if (table->write_row_record == 0) + my_free((gptr) row_data, MYF(MY_WME)); + + return error; +} + +int THD::binlog_update_row(TABLE* table, bool is_trans, + MY_BITMAP const* cols, my_size_t colcnt, + const byte *before_record, + const byte *after_record) +{ + DBUG_ASSERT(current_stmt_binlog_row_based && mysql_bin_log.is_open()); + + bool error= 0; + my_size_t const before_maxlen = max_row_length(table, before_record); + my_size_t const after_maxlen = max_row_length(table, after_record); + + byte *row_data= table->write_row_record; + byte *before_row, *after_row; + if (row_data != 0) + { + before_row= row_data; + after_row= before_row + before_maxlen; + } + else + { + if (unlikely(!(row_data= (byte*)my_multi_malloc(MYF(MY_WME), + &before_row, before_maxlen, + &after_row, after_maxlen, + NULL)))) + return HA_ERR_OUT_OF_MEM; + } + + my_size_t const before_size= pack_row(table, cols, before_row, + before_record); + my_size_t const after_size= pack_row(table, cols, after_row, + after_record); + + Rows_log_event* const ev= + binlog_prepare_pending_rows_event(table, server_id, cols, colcnt, + before_size + after_size, is_trans, + static_cast<Update_rows_log_event*>(0)); + + error= (unlikely(!ev)) || ev->add_row_data(before_row, before_size) || + ev->add_row_data(after_row, after_size); + + if (!table->write_row_record) + { + /* add_row_data copies row_data to internal buffer */ + my_free((gptr)row_data, MYF(MY_WME)); + } + + return error; +} + +int THD::binlog_delete_row(TABLE* table, bool is_trans, + MY_BITMAP const* cols, my_size_t colcnt, + byte const *record) +{ + DBUG_ASSERT(current_stmt_binlog_row_based && mysql_bin_log.is_open()); + + /* + Pack records into format for transfer. We are allocating more + memory than needed, but that doesn't matter. + */ + bool error= 0; + my_size_t const max_len= max_row_length(table, record); + byte *row_data= table->write_row_record; + if (!row_data && unlikely(!(row_data= (byte*)my_malloc(max_len, MYF(MY_WME))))) + return HA_ERR_OUT_OF_MEM; + my_size_t const len= pack_row(table, cols, row_data, record); + + Rows_log_event* const ev= + binlog_prepare_pending_rows_event(table, server_id, cols, colcnt, + len, is_trans, + static_cast<Delete_rows_log_event*>(0)); + + error= (unlikely(!ev)) || ev->add_row_data(row_data, len); + + /* add_row_data copies row_data */ + if (table->write_row_record == 0) + my_free((gptr)row_data, MYF(MY_WME)); + + return error; +} + + +int THD::binlog_flush_pending_rows_event(bool stmt_end) +{ + DBUG_ENTER("THD::binlog_flush_pending_rows_event"); + if (!current_stmt_binlog_row_based || !mysql_bin_log.is_open()) + DBUG_RETURN(0); + + /* + Mark the event as the last event of a statement if the stmt_end + flag is set. + */ + int error= 0; + if (Rows_log_event *pending= binlog_get_pending_rows_event()) + { + if (stmt_end) + { + pending->set_flags(Rows_log_event::STMT_END_F); + pending->flags|= LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F; + } + + /* + We only bother to set the pending event if it is non-NULL. This + is essential for correctness, since there is not necessarily a + trx_data created for the thread if the pending event is NULL. + */ + error= mysql_bin_log.flush_and_set_pending_rows_event(this, 0); + } + + DBUG_RETURN(error); +} + + +void THD::binlog_delete_pending_rows_event() +{ + if (Rows_log_event *pending= binlog_get_pending_rows_event()) + { + delete pending; + binlog_set_pending_rows_event(0); + } +} + +#endif /* HAVE_ROW_BASED_REPLICATION */ + +/* + Member function that will log query, either row-based or + statement-based depending on the value of the 'current_stmt_binlog_row_based' + the value of the 'qtype' flag. + + This function should be called after the all calls to ha_*_row() + functions have been issued, but before tables are unlocked and + closed. + + RETURN VALUE + Error code, or 0 if no error. +*/ +int THD::binlog_query(THD::enum_binlog_query_type qtype, + char const *query, ulong query_len, + bool is_trans, bool suppress_use) +{ + DBUG_ENTER("THD::binlog_query"); + DBUG_ASSERT(query && mysql_bin_log.is_open()); + int error= binlog_flush_pending_rows_event(true); + switch (qtype) + { + case THD::MYSQL_QUERY_TYPE: + /* + Using this query type is a conveniece hack, since we have been + moving back and forth between using RBR for replication of + system tables and not using it. + + Make sure to change in check_table_current_stmt_binlog_row_based according + to how you treat this. + */ + case THD::ROW_QUERY_TYPE: + if (current_stmt_binlog_row_based) + DBUG_RETURN(binlog_flush_pending_rows_event(true)); + /* Otherwise, we fall through */ + case THD::STMT_QUERY_TYPE: + /* + Most callers of binlog_query() ignore the error code, assuming + that the statement will always be written to the binlog. In + case of error above, we therefore just continue and write the + statement to the binary log. + */ + { + Query_log_event qinfo(this, query, query_len, is_trans, suppress_use); + qinfo.flags|= LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F; + DBUG_RETURN(mysql_bin_log.write(&qinfo)); + } + break; + + case THD::QUERY_TYPE_COUNT: + default: + DBUG_ASSERT(0 <= qtype && qtype < QUERY_TYPE_COUNT); + } + DBUG_RETURN(0); +} + +#endif /* !defined(MYSQL_CLIENT) */ diff --git a/sql/sql_class.h b/sql/sql_class.h index 048a0a49618..267faba49f8 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -21,19 +21,20 @@ #pragma interface /* gcc class implementation */ #endif -// TODO: create log.h and move all the log header stuff there +#include "log.h" +#include "rpl_rli.h" +#include "rpl_tblmap.h" class Query_log_event; class Load_log_event; class Slave_log_event; -class Format_description_log_event; class sp_rcontext; class sp_cache; +class Rows_log_event; enum enum_enable_or_disable { LEAVE_AS_IS, ENABLE, DISABLE }; enum enum_ha_read_modes { RFIRST, RNEXT, RPREV, RLAST, RKEY, RNEXT_SAME }; enum enum_duplicates { DUP_ERROR, DUP_REPLACE, DUP_UPDATE }; -enum enum_log_type { LOG_CLOSED, LOG_TO_BE_OPENED, LOG_NORMAL, LOG_NEW, LOG_BIN}; enum enum_delay_key_write { DELAY_KEY_WRITE_NONE, DELAY_KEY_WRITE_ON, DELAY_KEY_WRITE_ALL }; @@ -50,118 +51,6 @@ extern const char **errmesg; #define TC_HEURISTIC_RECOVER_ROLLBACK 2 extern uint tc_heuristic_recover; -/* - Transaction Coordinator log - a base abstract class - for two different implementations -*/ -class TC_LOG -{ - public: - int using_heuristic_recover(); - TC_LOG() {} - virtual ~TC_LOG() {} - - virtual int open(const char *opt_name)=0; - virtual void close()=0; - virtual int log(THD *thd, my_xid xid)=0; - virtual void unlog(ulong cookie, my_xid xid)=0; -}; - -class TC_LOG_DUMMY: public TC_LOG // use it to disable the logging -{ -public: - TC_LOG_DUMMY() {} /* Remove gcc warning */ - int open(const char *opt_name) { return 0; } - void close() { } - int log(THD *thd, my_xid xid) { return 1; } - void unlog(ulong cookie, my_xid xid) { } -}; - -#ifdef HAVE_MMAP -class TC_LOG_MMAP: public TC_LOG -{ - public: // only to keep Sun Forte on sol9x86 happy - typedef enum { - POOL, // page is in pool - ERROR, // last sync failed - DIRTY // new xids added since last sync - } PAGE_STATE; - - private: - typedef struct st_page { - struct st_page *next; // page a linked in a fifo queue - my_xid *start, *end; // usable area of a page - my_xid *ptr; // next xid will be written here - int size, free; // max and current number of free xid slots on the page - int waiters; // number of waiters on condition - PAGE_STATE state; // see above - pthread_mutex_t lock; // to access page data or control structure - pthread_cond_t cond; // to wait for a sync - } PAGE; - - char logname[FN_REFLEN]; - File fd; - my_off_t file_length; - uint npages, inited; - uchar *data; - struct st_page *pages, *syncing, *active, *pool, *pool_last; - /* - note that, e.g. LOCK_active is only used to protect - 'active' pointer, to protect the content of the active page - one has to use active->lock. - Same for LOCK_pool and LOCK_sync - */ - pthread_mutex_t LOCK_active, LOCK_pool, LOCK_sync; - pthread_cond_t COND_pool, COND_active; - - public: - TC_LOG_MMAP(): inited(0) {} - int open(const char *opt_name); - void close(); - int log(THD *thd, my_xid xid); - void unlog(ulong cookie, my_xid xid); - int recover(); - - private: - void get_active_from_pool(); - int sync(); - int overflow(); -}; -#else -#define TC_LOG_MMAP TC_LOG_DUMMY -#endif - -extern TC_LOG *tc_log; -extern TC_LOG_MMAP tc_log_mmap; -extern TC_LOG_DUMMY tc_log_dummy; - -/* log info errors */ -#define LOG_INFO_EOF -1 -#define LOG_INFO_IO -2 -#define LOG_INFO_INVALID -3 -#define LOG_INFO_SEEK -4 -#define LOG_INFO_MEM -6 -#define LOG_INFO_FATAL -7 -#define LOG_INFO_IN_USE -8 - -/* bitmap to SQL_LOG::close() */ -#define LOG_CLOSE_INDEX 1 -#define LOG_CLOSE_TO_BE_OPENED 2 -#define LOG_CLOSE_STOP_EVENT 4 - -struct st_relay_log_info; - -typedef struct st_log_info -{ - char log_file_name[FN_REFLEN]; - my_off_t index_file_offset, index_file_start_offset; - my_off_t pos; - bool fatal; // if the purge happens to give us a negative offset - pthread_mutex_t lock; - st_log_info():fatal(0) { pthread_mutex_init(&lock, MY_MUTEX_INIT_FAST);} - ~st_log_info() { pthread_mutex_destroy(&lock);} -} LOG_INFO; - typedef struct st_user_var_events { user_var_entry *user_var_event; @@ -174,188 +63,6 @@ typedef struct st_user_var_events #define RP_LOCK_LOG_IS_ALREADY_LOCKED 1 #define RP_FORCE_ROTATE 2 -class Log_event; - -/* - TODO split MYSQL_LOG into base MYSQL_LOG and - MYSQL_QUERY_LOG, MYSQL_SLOW_LOG, MYSQL_BIN_LOG - most of the code from MYSQL_LOG should be in the MYSQL_BIN_LOG - only (TC_LOG included) - - TODO use mmap instead of IO_CACHE for binlog - (mmap+fsync is two times faster than write+fsync) -*/ - -class MYSQL_LOG: public TC_LOG -{ - private: - /* LOCK_log and LOCK_index are inited by init_pthread_objects() */ - pthread_mutex_t LOCK_log, LOCK_index; - pthread_mutex_t LOCK_prep_xids; - pthread_cond_t COND_prep_xids; - pthread_cond_t update_cond; - ulonglong bytes_written; - time_t last_time,query_start; - IO_CACHE log_file; - IO_CACHE index_file; - char *name; - char time_buff[20],db[NAME_LEN+1]; - char log_file_name[FN_REFLEN],index_file_name[FN_REFLEN]; - /* - The max size before rotation (usable only if log_type == LOG_BIN: binary - logs and relay logs). - For a binlog, max_size should be max_binlog_size. - For a relay log, it should be max_relay_log_size if this is non-zero, - max_binlog_size otherwise. - max_size is set in init(), and dynamically changed (when one does SET - GLOBAL MAX_BINLOG_SIZE|MAX_RELAY_LOG_SIZE) by fix_max_binlog_size and - fix_max_relay_log_size). - */ - ulong max_size; - ulong prepared_xids; /* for tc log - number of xids to remember */ - volatile enum_log_type log_type; - enum cache_type io_cache_type; - // current file sequence number for load data infile binary logging - uint file_id; - uint open_count; // For replication - int readers_count; - bool write_error, inited; - bool need_start_event; - /* - no_auto_events means we don't want any of these automatic events : - Start/Rotate/Stop. That is, in 4.x when we rotate a relay log, we don't - want a Rotate_log event to be written to the relay log. When we start a - relay log etc. So in 4.x this is 1 for relay logs, 0 for binlogs. - In 5.0 it's 0 for relay logs too! - */ - bool no_auto_events; - friend class Log_event; - -public: - /* - These describe the log's format. This is used only for relay logs. - _for_exec is used by the SQL thread, _for_queue by the I/O thread. It's - necessary to have 2 distinct objects, because the I/O thread may be reading - events in a different format from what the SQL thread is reading (consider - the case of a master which has been upgraded from 5.0 to 5.1 without doing - RESET MASTER, or from 4.x to 5.0). - */ - Format_description_log_event *description_event_for_exec, - *description_event_for_queue; - - MYSQL_LOG(); - /* - note that there's no destructor ~MYSQL_LOG() ! - The reason is that we don't want it to be automatically called - on exit() - but only during the correct shutdown process - */ - - int open(const char *opt_name); - void close(); - int log(THD *thd, my_xid xid); - void unlog(ulong cookie, my_xid xid); - int recover(IO_CACHE *log, Format_description_log_event *fdle); - void reset_bytes_written() - { - bytes_written = 0; - } - void harvest_bytes_written(ulonglong* counter) - { -#ifndef DBUG_OFF - char buf1[22],buf2[22]; -#endif - DBUG_ENTER("harvest_bytes_written"); - (*counter)+=bytes_written; - DBUG_PRINT("info",("counter: %s bytes_written: %s", llstr(*counter,buf1), - llstr(bytes_written,buf2))); - bytes_written=0; - DBUG_VOID_RETURN; - } - void set_max_size(ulong max_size_arg); - void signal_update(); - void wait_for_update(THD* thd, bool master_or_slave); - void set_need_start_event() { need_start_event = 1; } - void init(enum_log_type log_type_arg, - enum cache_type io_cache_type_arg, - bool no_auto_events_arg, ulong max_size); - void init_pthread_objects(); - void cleanup(); - bool open(const char *log_name, - enum_log_type log_type, - const char *new_name, - enum cache_type io_cache_type_arg, - bool no_auto_events_arg, ulong max_size, - bool null_created); - const char *generate_name(const char *log_name, const char *suffix, - bool strip_ext, char *buff); - /* simplified open_xxx wrappers for the gigantic open above */ - bool open_query_log(const char *log_name) - { - char buf[FN_REFLEN]; - return open(generate_name(log_name, ".log", 0, buf), - LOG_NORMAL, 0, WRITE_CACHE, 0, 0, 0); - } - bool open_slow_log(const char *log_name) - { - char buf[FN_REFLEN]; - return open(generate_name(log_name, "-slow.log", 0, buf), - LOG_NORMAL, 0, WRITE_CACHE, 0, 0, 0); - } - bool open_index_file(const char *index_file_name_arg, - const char *log_name); - void new_file(bool need_lock); - bool write(THD *thd, enum enum_server_command command, - const char *format,...); - bool write(THD *thd, const char *query, uint query_length, - time_t query_start=0); - bool write(Log_event* event_info); // binary log write - bool write(THD *thd, IO_CACHE *cache, Log_event *commit_event); - - void start_union_events(THD *thd); - void stop_union_events(THD *thd); - bool is_query_in_union(THD *thd, query_id_t query_id_param); - - /* - v stands for vector - invoked as appendv(buf1,len1,buf2,len2,...,bufn,lenn,0) - */ - bool appendv(const char* buf,uint len,...); - bool append(Log_event* ev); - - int generate_new_name(char *new_name,const char *old_name); - void make_log_name(char* buf, const char* log_ident); - bool is_active(const char* log_file_name); - int update_log_index(LOG_INFO* linfo, bool need_update_threads); - void rotate_and_purge(uint flags); - bool flush_and_sync(); - int purge_logs(const char *to_log, bool included, - bool need_mutex, bool need_update_threads, - ulonglong *decrease_log_space); - int purge_logs_before_date(time_t purge_time); - int purge_first_log(struct st_relay_log_info* rli, bool included); - bool reset_logs(THD* thd); - void close(uint exiting); - - // iterating through the log index file - int find_log_pos(LOG_INFO* linfo, const char* log_name, - bool need_mutex); - int find_next_log(LOG_INFO* linfo, bool need_mutex); - int get_current_log(LOG_INFO* linfo); - uint next_file_id(); - inline bool is_open() { return log_type != LOG_CLOSED; } - inline char* get_index_fname() { return index_file_name;} - inline char* get_log_fname() { return log_file_name; } - inline char* get_name() { return name; } - inline pthread_mutex_t* get_log_lock() { return &LOCK_log; } - inline IO_CACHE* get_log_file() { return &log_file; } - - inline void lock_index() { pthread_mutex_lock(&LOCK_index);} - inline void unlock_index() { pthread_mutex_unlock(&LOCK_index);} - inline IO_CACHE *get_index_file() { return &index_file;} - inline uint32 get_open_count() { return open_count; } -}; - - typedef struct st_copy_info { ha_rows records; ha_rows deleted; @@ -409,11 +116,13 @@ public: List<key_part_spec> columns; const char *name; bool generated; + LEX_STRING *parser_name; Key(enum Keytype type_par, const char *name_arg, enum ha_key_alg alg_par, - bool generated_arg, List<key_part_spec> &cols) + bool generated_arg, List<key_part_spec> &cols, + LEX_STRING *parser_arg= 0) :type(type_par), algorithm(alg_par), columns(cols), name(name_arg), - generated(generated_arg) + generated(generated_arg), parser_name(parser_arg) {} ~Key() {} /* Equality comparison of keys (ignoring name) */ @@ -460,29 +169,9 @@ public: #include "sql_lex.h" /* Must be here */ -/* Needed to be able to have an I_List of char* strings in mysqld.cc. */ - -class i_string: public ilink -{ -public: - char* ptr; - i_string():ptr(0) { } - i_string(char* s) : ptr(s) {} -}; - -/* needed for linked list of two strings for replicate-rewrite-db */ -class i_string_pair: public ilink -{ -public: - char* key; - char* val; - i_string_pair():key(0),val(0) { } - i_string_pair(char* key_arg, char* val_arg) : key(key_arg),val(val_arg) {} -}; - - class delayed_insert; class select_result; +class Time_zone; #define THD_SENTRY_MAGIC 0xfeedd1ff #define THD_SENTRY_GONE 0xdeadbeef @@ -524,7 +213,7 @@ struct system_variables ulong read_rnd_buff_size; ulong div_precincrement; ulong sortbuff_size; - ulong table_type; + handlerton *table_type; ulong tmp_table_size; ulong tx_isolation; ulong completion_type; @@ -557,16 +246,18 @@ struct system_variables ulong sync_replication_slave_id; ulong sync_replication_timeout; #endif /* HAVE_REPLICATION */ -#ifdef HAVE_INNOBASE_DB my_bool innodb_table_locks; my_bool innodb_support_xa; -#endif /* HAVE_INNOBASE_DB */ -#ifdef HAVE_NDBCLUSTER_DB - ulong ndb_autoincrement_prefetch_sz; my_bool ndb_force_send; my_bool ndb_use_exact_count; my_bool ndb_use_transactions; -#endif /* HAVE_NDBCLUSTER_DB */ + my_bool ndb_index_stat_enable; + ulong ndb_autoincrement_prefetch_sz; + ulong ndb_index_stat_cache_entries; + ulong ndb_index_stat_update_freq; + ulong binlog_format; // binlog format for this thd (see enum_binlog_format) + + my_bool old_alter_table; my_bool old_passwords; /* Only charset part of these variables is sensible */ @@ -626,6 +317,7 @@ typedef struct system_status_var ulong net_big_packet_count; ulong opened_tables; + ulong opened_shares; ulong select_full_join_count; ulong select_full_range_join_count; ulong select_range_count; @@ -655,6 +347,8 @@ typedef struct system_status_var #define last_system_status_var com_stmt_close +#ifdef MYSQL_SERVER + void free_tmp_table(THD *thd, TABLE *entry); @@ -665,7 +359,6 @@ void free_tmp_table(THD *thd, TABLE *entry); #define INIT_ARENA_DBUG_INFO #endif - class Query_arena { public: @@ -779,8 +472,15 @@ public: /* - if set_query_id=1, we set field->query_id for all fields. In that case field list can not contain duplicates. + 0: Means query_id is not set and no indicator to handler of fields used + is set + 1: Means query_id is set for fields in list and bit in read set is set + to inform handler of that field is to be read + 2: Means query is set for fields in list and bit is set in update set + to inform handler that it needs to update this field in write_row + and update_row */ - bool set_query_id; + ulong set_query_id; LEX_STRING name; /* name for named prepared statements */ LEX *lex; // parse tree descriptor @@ -1094,6 +794,9 @@ class THD :public Statement, public Open_tables_state { public: + /* Used to execute base64 coded binlog events in MySQL server */ + RELAY_LOG_INFO* rli_fake; + /* Constant for THD::where initialization in the beginning of every query. @@ -1201,12 +904,97 @@ public: /* container for handler's private per-connection data */ void *ha_data[MAX_HA]; + +#ifdef HAVE_ROW_BASED_REPLICATION +#ifndef MYSQL_CLIENT + + /* + Public interface to write rows to the binlog + */ + int binlog_write_row(TABLE* table, bool is_transactional, + MY_BITMAP const* cols, my_size_t colcnt, + const byte *buf); + int binlog_delete_row(TABLE* table, bool is_transactional, + MY_BITMAP const* cols, my_size_t colcnt, + const byte *buf); + int binlog_update_row(TABLE* table, bool is_transactional, + MY_BITMAP const* cols, my_size_t colcnt, + const byte *old_data, const byte *new_data); + + void set_server_id(uint32 sid) { server_id = sid; } + + /* + Member functions to handle pending event for row-level logging. + */ + template <class RowsEventT> Rows_log_event* + binlog_prepare_pending_rows_event(TABLE* table, uint32 serv_id, + MY_BITMAP const* cols, + my_size_t colcnt, + my_size_t needed, + bool is_transactional, + RowsEventT* hint); + Rows_log_event* binlog_get_pending_rows_event() const; + void binlog_set_pending_rows_event(Rows_log_event* ev); + int binlog_setup_trx_data(); + + my_size_t max_row_length_blob(TABLE* table, const byte *data) const; + my_size_t max_row_length(TABLE* table, const byte *data) const + { + TABLE_SHARE *table_s= table->s; + my_size_t length= table_s->reclength + 2 * table_s->fields; + if (table_s->blob_fields == 0) + return length; + + return (length+max_row_length_blob(table,data)); + } + + my_size_t pack_row(TABLE* table, MY_BITMAP const* cols, byte *row_data, + const byte *data) const; + + int binlog_flush_pending_rows_event(bool stmt_end); + void binlog_delete_pending_rows_event(); + +#endif +#endif /* HAVE_ROW_BASED_REPLICATION */ +#ifndef MYSQL_CLIENT + enum enum_binlog_query_type { + /* + The query can be logged row-based or statement-based + */ + ROW_QUERY_TYPE, + + /* + The query has to be logged statement-based + */ + STMT_QUERY_TYPE, + + /* + The query represents a change to a table in the "mysql" + database and is currently mapped to ROW_QUERY_TYPE. + */ + MYSQL_QUERY_TYPE, + QUERY_TYPE_COUNT + }; + + int binlog_query(enum_binlog_query_type qtype, + char const *query, ulong query_len, + bool is_trans, bool suppress_use); +#endif + +public: + struct st_transactions { SAVEPOINT *savepoints; THD_TRANS all; // Trans since BEGIN WORK THD_TRANS stmt; // Trans for current statement bool on; // see ha_enable_transaction() + XID xid; // transaction identifier + enum xa_states xa_state; // used by external XA only XID_STATE xid_state; +#ifdef HAVE_ROW_BASED_REPLICATION + Rows_log_event *m_pending_rows_event; +#endif + /* Tables changed in transaction (that must be invalidated in query cache). List contain only transactional tables, that not invalidated in query @@ -1338,6 +1126,8 @@ public: char scramble[SCRAMBLE_LENGTH+1]; bool slave_thread, one_shot_set; + /* tells if current statement should binlog row-based(1) or stmt-based(0) */ + bool current_stmt_binlog_row_based; bool locked, some_tables_deleted; bool last_cuted_field; bool no_errors, password, is_fatal_error; @@ -1592,6 +1382,15 @@ public: void restore_sub_statement_state(Sub_statement_state *backup); void set_n_backup_active_arena(Query_arena *set, Query_arena *backup); void restore_active_arena(Query_arena *set, Query_arena *backup); + inline void set_current_stmt_binlog_row_based_if_mixed() + { + if (variables.binlog_format == BINLOG_FORMAT_MIXED) + current_stmt_binlog_row_based= 1; + } + inline void reset_current_stmt_binlog_row_based() + { + current_stmt_binlog_row_based= test(variables.binlog_format == BINLOG_FORMAT_ROW); + } }; @@ -1605,6 +1404,7 @@ public: #define SYSTEM_THREAD_DELAYED_INSERT 1 #define SYSTEM_THREAD_SLAVE_IO 2 #define SYSTEM_THREAD_SLAVE_SQL 4 +#define SYSTEM_THREAD_NDBCLUSTER_BINLOG 8 /* Used to hold information about file and file structure in exchainge @@ -1770,6 +1570,7 @@ class select_create: public select_insert { HA_CREATE_INFO *create_info; MYSQL_LOCK *lock; Field **field; + bool create_table_written; public: select_create (TABLE_LIST *table, HA_CREATE_INFO *create_info_par, @@ -1778,9 +1579,11 @@ public: List<Item> &select_fields,enum_duplicates duplic, bool ignore) :select_insert (NULL, NULL, &select_fields, 0, 0, duplic, ignore), create_table(table), extra_fields(&fields_par),keys(&keys_par), create_info(create_info_par), - lock(0) + lock(0), create_table_written(FALSE) {} int prepare(List<Item> &list, SELECT_LEX_UNIT *u); + + void binlog_show_create_table(); void store_values(List<Item> &values); void send_error(uint errcode,const char *err); bool send_eof(); @@ -2128,3 +1931,5 @@ public: /* Functions in sql_class.cc */ void add_to_status(STATUS_VAR *to_var, STATUS_VAR *from_var); + +#endif /* MYSQL_SERVER */ diff --git a/sql/sql_db.cc b/sql/sql_db.cc index 4caa0076c60..3d035359b6f 100644 --- a/sql/sql_db.cc +++ b/sql/sql_db.cc @@ -20,6 +20,7 @@ #include "mysql_priv.h" #include <mysys_err.h> #include "sp.h" +#include "event.h" #include <my_dir.h> #include <m_ctype.h> #ifdef __WIN__ @@ -38,6 +39,107 @@ static long mysql_rm_known_files(THD *thd, MY_DIR *dirp, static long mysql_rm_arc_files(THD *thd, MY_DIR *dirp, const char *org_path); static my_bool rm_dir_w_symlink(const char *org_path, my_bool send_error); + + +/* Database lock hash */ +HASH lock_db_cache; +pthread_mutex_t LOCK_lock_db; +int creating_database= 0; // how many database locks are made + + +/* Structure for database lock */ +typedef struct my_dblock_st +{ + char *name; /* Database name */ + uint name_length; /* Database length name */ +} my_dblock_t; + + +/* + lock_db key. +*/ + +static byte* lock_db_get_key(my_dblock_t *ptr, uint *length, + my_bool not_used __attribute__((unused))) +{ + *length= ptr->name_length; + return (byte*) ptr->name; +} + + +/* + Free lock_db hash element. +*/ + +static void lock_db_free_element(void *ptr) +{ + my_free((gptr) ptr, MYF(0)); +} + + +/* + Put a database lock entry into the hash. + + DESCRIPTION + Insert a database lock entry into hash. + LOCK_db_lock must be previously locked. + + RETURN VALUES + 0 on success. + 1 on error. +*/ + +static my_bool lock_db_insert(const char *dbname, uint length) +{ + my_dblock_t *opt; + my_bool error= 0; + DBUG_ENTER("lock_db_insert"); + + safe_mutex_assert_owner(&LOCK_lock_db); + + if (!(opt= (my_dblock_t*) hash_search(&lock_db_cache, + (byte*) dbname, length))) + { + /* Db is not in the hash, insert it */ + char *tmp_name; + if (!my_multi_malloc(MYF(MY_WME | MY_ZEROFILL), + &opt, (uint) sizeof(*opt), &tmp_name, length+1, + NullS)) + { + error= 1; + goto end; + } + + opt->name= tmp_name; + strmov(opt->name, dbname); + opt->name_length= length; + + if ((error= my_hash_insert(&lock_db_cache, (byte*) opt))) + { + my_free((gptr) opt, MYF(0)); + goto end; + } + } + +end: + DBUG_RETURN(error); +} + + +/* + Delete a database lock entry from hash. +*/ + +void lock_db_delete(const char *name, uint length) +{ + my_dblock_t *opt; + safe_mutex_assert_owner(&LOCK_lock_db); + opt= (my_dblock_t *)hash_search(&lock_db_cache, (const byte*) name, length); + DBUG_ASSERT(opt != NULL); + hash_delete(&lock_db_cache, (byte*) opt); +} + + /* Database options hash */ static HASH dboptions; static my_bool dboptions_init= 0; @@ -90,10 +192,10 @@ static void free_dbopt(void *dbopt) /* - Initialize database option hash + Initialize database option hash and locked database hash. SYNOPSIS - my_dbopt_init() + my_database_names() NOTES Must be called before any other database function is called. @@ -103,7 +205,7 @@ static void free_dbopt(void *dbopt) 1 Fatal error */ -bool my_dbopt_init(void) +bool my_database_names_init(void) { bool error= 0; (void) my_rwlock_init(&LOCK_dboptions, NULL); @@ -113,27 +215,38 @@ bool my_dbopt_init(void) error= hash_init(&dboptions, lower_case_table_names ? &my_charset_bin : system_charset_info, 32, 0, 0, (hash_get_key) dboptions_get_key, - free_dbopt,0); + free_dbopt,0) || + hash_init(&lock_db_cache, lower_case_table_names ? + &my_charset_bin : system_charset_info, + 32, 0, 0, (hash_get_key) lock_db_get_key, + lock_db_free_element,0); + } return error; } + /* - Free database option hash. + Free database option hash and locked databases hash. */ -void my_dbopt_free(void) +void my_database_names_free(void) { if (dboptions_init) { dboptions_init= 0; hash_free(&dboptions); (void) rwlock_destroy(&LOCK_dboptions); + hash_free(&lock_db_cache); } } +/* + Cleanup cached options +*/ + void my_dbopt_cleanup(void) { rw_wrlock(&LOCK_dboptions); @@ -272,7 +385,7 @@ static bool write_db_opt(THD *thd, const char *path, HA_CREATE_INFO *create) if ((file=my_create(path, CREATE_MODE,O_RDWR | O_TRUNC,MYF(MY_WME))) >= 0) { ulong length; - length= (ulong) (strxnmov(buf, sizeof(buf), "default-character-set=", + length= (ulong) (strxnmov(buf, sizeof(buf)-1, "default-character-set=", create->default_table_charset->csname, "\ndefault-collation=", create->default_table_charset->name, @@ -411,6 +524,7 @@ bool mysql_create_db(THD *thd, char *db, HA_CREATE_INFO *create_info, bool silent) { char path[FN_REFLEN+16]; + char tmp_query[FN_REFLEN+16]; long result= 1; int error= 0; MY_STAT stat_info; @@ -435,8 +549,7 @@ bool mysql_create_db(THD *thd, char *db, HA_CREATE_INFO *create_info, } /* Check directory */ - strxmov(path, mysql_data_home, "/", db, NullS); - path_len= unpack_dirname(path,path); // Convert if not unix + path_len= build_table_filename(path, sizeof(path), db, "", ""); path[path_len-1]= 0; // Remove last '/' from path if (my_stat(path,&stat_info,MYF(0))) @@ -497,15 +610,20 @@ bool mysql_create_db(THD *thd, char *db, HA_CREATE_INFO *create_info, if (!thd->query) // Only in replication { - query= path; - query_length= (uint) (strxmov(path,"create database `", db, "`", NullS) - - path); + query= tmp_query; + query_length= (uint) (strxmov(tmp_query,"create database `", + db, "`", NullS) - tmp_query); } else { query= thd->query; query_length= thd->query_length; } + + ha_binlog_log_query(thd, 0, LOGCOM_CREATE_DB, + query, query_length, + db, ""); + if (mysql_bin_log.is_open()) { Query_log_event qinfo(thd, query, query_length, 0, @@ -559,9 +677,12 @@ bool mysql_alter_db(THD *thd, const char *db, HA_CREATE_INFO *create_info) if ((error=wait_if_global_read_lock(thd,0,1))) goto exit2; - /* Check directory */ - strxmov(path, mysql_data_home, "/", db, "/", MY_DB_OPT_FILE, NullS); - fn_format(path, path, "", "", MYF(MY_UNPACK_FILENAME)); + /* + Recreate db options file: /dbpath/.db.opt + We pass MY_DB_OPT_FILE as "extension" to avoid + "table name to file name" encoding. + */ + build_table_filename(path, sizeof(path), db, "", MY_DB_OPT_FILE); if ((error=write_db_opt(thd, path, create_info))) goto exit; @@ -577,6 +698,10 @@ bool mysql_alter_db(THD *thd, const char *db, HA_CREATE_INFO *create_info) thd->variables.collation_database= thd->db_charset; } + ha_binlog_log_query(thd, 0, LOGCOM_ALTER_DB, + thd->query, thd->query_length, + db, ""); + if (mysql_bin_log.is_open()) { Query_log_event qinfo(thd, thd->query, thd->query_length, 0, @@ -639,8 +764,7 @@ bool mysql_rm_db(THD *thd,char *db,bool if_exists, bool silent) goto exit2; } - (void) sprintf(path,"%s/%s",mysql_data_home,db); - length= unpack_dirname(path,path); // Convert if not unix + length= build_table_filename(path, sizeof(path), db, "", ""); strmov(path+length, MY_DB_OPT_FILE); // Append db option file name del_dbopt(path); // Remove dboption hash entry path[length]= '\0'; // Remove file name @@ -747,6 +871,7 @@ bool mysql_rm_db(THD *thd,char *db,bool if_exists, bool silent) exit: (void)sp_drop_db_routines(thd, db); /* QQ Ignore errors for now */ + (void)evex_drop_db_events(thd, db); /* QQ Ignore errors for now */ start_waiting_global_read_lock(thd); /* If this database was the client's selected database, we silently change the @@ -862,7 +987,8 @@ static long mysql_rm_known_files(THD *thd, MY_DIR *dirp, const char *db, found_other_files++; continue; } - extension= fn_ext(file->name); + if (!(extension= strrchr(file->name, '.'))) + extension= strend(file->name); if (find_type(extension, &deletable_extentions,1+2) <= 0) { if (find_type(extension, ha_known_exts(),1+2) <= 0) @@ -880,7 +1006,9 @@ static long mysql_rm_known_files(THD *thd, MY_DIR *dirp, const char *db, if (!table_list) goto err; table_list->db= (char*) (table_list+1); - strmov(table_list->table_name= strmov(table_list->db,db)+1, file->name); + table_list->table_name= strmov(table_list->db, db) + 1; + VOID(filename_to_tablename(file->name, table_list->table_name, + strlen(file->name) + 1)); table_list->alias= table_list->table_name; // If lower_case_table_names=2 /* Link into list */ (*tot_list_next)= table_list; @@ -1160,16 +1288,15 @@ bool mysql_change_db(THD *thd, const char *name, bool no_access_check) sctx->priv_user, sctx->priv_host, dbname); - mysql_log.write(thd, COM_INIT_DB, ER(ER_DBACCESS_DENIED_ERROR), - sctx->priv_user, sctx->priv_host, dbname); + general_log_print(thd, COM_INIT_DB, ER(ER_DBACCESS_DENIED_ERROR), + sctx->priv_user, sctx->priv_host, dbname); if (!(thd->slave_thread)) my_free(dbname,MYF(0)); DBUG_RETURN(1); } } #endif - (void) sprintf(path,"%s/%s",mysql_data_home,dbname); - length=unpack_dirname(path,path); // Convert if not unix + length= build_table_filename(path, sizeof(path), dbname, "", ""); if (length && path[length-1] == FN_LIBCHAR) path[length-1]=0; // remove ending '\' if (my_access(path,F_OK)) @@ -1214,3 +1341,312 @@ end: } DBUG_RETURN(0); } + + +static int +lock_databases(THD *thd, const char *db1, uint length1, + const char *db2, uint length2) +{ + pthread_mutex_lock(&LOCK_lock_db); + while (!thd->killed && + (hash_search(&lock_db_cache,(byte*) db1, length1) || + hash_search(&lock_db_cache,(byte*) db2, length2))) + { + wait_for_condition(thd, &LOCK_lock_db, &COND_refresh); + pthread_mutex_lock(&LOCK_lock_db); + } + + if (thd->killed) + { + pthread_mutex_unlock(&LOCK_lock_db); + return 1; + } + + lock_db_insert(db1, length1); + lock_db_insert(db2, length2); + creating_database++; + + /* + Wait if a concurent thread is creating a table at the same time. + The assumption here is that it will not take too long until + there is a point in time when a table is not created. + */ + + while (!thd->killed && creating_table) + { + wait_for_condition(thd, &LOCK_lock_db, &COND_refresh); + pthread_mutex_lock(&LOCK_lock_db); + } + + if (thd->killed) + { + lock_db_delete(db1, length1); + lock_db_delete(db2, length2); + creating_database--; + pthread_mutex_unlock(&LOCK_lock_db); + pthread_cond_signal(&COND_refresh); + return(1); + } + + /* + We can unlock now as the hash will protect against anyone creating a table + in the databases we are using + */ + pthread_mutex_unlock(&LOCK_lock_db); + return 0; +} + + +/* + Rename database. + + SYNOPSIS + mysql_rename_db() + thd Thread handler + olddb Old database name + newdb New database name + + DESCRIPTION + This function is invoked whenever a RENAME DATABASE query is executed: + + RENAME DATABASE 'olddb' TO 'newdb'. + + NOTES + + If we have managed to rename (move) tables to the new database + but something failed on a later step, then we store the + RENAME DATABASE event in the log. mysql_rename_db() is atomic in + the sense that it will rename all or none of the tables. + + TODO: + - Better trigger, stored procedure, event, grant handling, + see the comments below. + NOTE: It's probably a good idea to call wait_if_global_read_lock() + once in mysql_rename_db(), instead of locking inside all + the required functions for renaming triggerts, SP, events, grants, etc. + + RETURN VALUES + 0 ok + 1 error +*/ + + +bool mysql_rename_db(THD *thd, LEX_STRING *old_db, LEX_STRING *new_db) +{ + int error= 0, change_to_newdb= 0; + char path[FN_REFLEN+16]; + uint length; + HA_CREATE_INFO create_info; + MY_DIR *dirp; + TABLE_LIST *table_list; + SELECT_LEX *sl= thd->lex->current_select; + DBUG_ENTER("mysql_rename_db"); + + if (lock_databases(thd, old_db->str, old_db->length, + new_db->str, new_db->length)) + return 1; + + /* + Let's remember if we should do "USE newdb" afterwards. + thd->db will be cleared in mysql_rename_db() + */ + if (thd->db && !strcmp(thd->db, old_db->str)) + change_to_newdb= 1; + + build_table_filename(path, sizeof(path)-1, old_db->str, "", MY_DB_OPT_FILE); + if ((load_db_opt(thd, path, &create_info))) + create_info.default_table_charset= thd->variables.collation_server; + + length= build_table_filename(path, sizeof(path)-1, old_db->str, "", ""); + if (length && path[length-1] == FN_LIBCHAR) + path[length-1]=0; // remove ending '\' + if ((error= my_access(path,F_OK))) + { + my_error(ER_BAD_DB_ERROR, MYF(0), old_db->str); + goto exit; + } + + /* Step1: Create the new database */ + if ((error= mysql_create_db(thd, new_db->str, &create_info, 1))) + goto exit; + + /* Step2: Move tables to the new database */ + if ((dirp = my_dir(path,MYF(MY_DONT_SORT)))) + { + uint nfiles= (uint) dirp->number_off_files; + for (uint idx=0 ; idx < nfiles && !thd->killed ; idx++) + { + FILEINFO *file= dirp->dir_entry + idx; + char *extension, tname[FN_REFLEN]; + LEX_STRING table_str; + DBUG_PRINT("info",("Examining: %s", file->name)); + + /* skiping non-FRM files */ + if (my_strcasecmp(files_charset_info, + (extension= fn_rext(file->name)), reg_ext)) + continue; + + /* A frm file found, add the table info rename list */ + *extension= '\0'; + + table_str.length= filename_to_tablename(file->name, + tname, sizeof(tname)-1); + table_str.str= sql_memdup(tname, table_str.length + 1); + Table_ident *old_ident= new Table_ident(thd, *old_db, table_str, 0); + Table_ident *new_ident= new Table_ident(thd, *new_db, table_str, 0); + if (!old_ident || !new_ident || + !sl->add_table_to_list(thd, old_ident, NULL, + TL_OPTION_UPDATING, TL_IGNORE) || + !sl->add_table_to_list(thd, new_ident, NULL, + TL_OPTION_UPDATING, TL_IGNORE)) + { + error= 1; + my_dirend(dirp); + goto exit; + } + } + my_dirend(dirp); + } + + if ((table_list= thd->lex->query_tables) && + (error= mysql_rename_tables(thd, table_list, 1))) + { + /* + Failed to move all tables from the old database to the new one. + In the best case mysql_rename_tables() moved all tables back to the old + database. In the worst case mysql_rename_tables() moved some tables + to the new database, then failed, then started to move the tables back, and + then failed again. In this situation we have some tables in the + old database and some tables in the new database. + Let's delete the option file, and then the new database directory. + If some tables were left in the new directory, rmdir() will fail. + It garantees we never loose any tables. + */ + build_table_filename(path, sizeof(path)-1, new_db->str,"",MY_DB_OPT_FILE); + my_delete(path, MYF(MY_WME)); + length= build_table_filename(path, sizeof(path)-1, new_db->str, "", ""); + if (length && path[length-1] == FN_LIBCHAR) + path[length-1]=0; // remove ending '\' + rmdir(path); + goto exit; + } + + + /* + Step3: move all remaining files to the new db's directory. + Skip db opt file: it's been created by mysql_create_db() in + the new directory, and will be dropped by mysql_rm_db() in the old one. + Trigger TRN and TRG files are be moved as regular files at the moment, + without any special treatment. + + Triggers without explicit database qualifiers in table names work fine: + use d1; + create trigger trg1 before insert on t2 for each row set @a:=1 + rename database d1 to d2; + + TODO: Triggers, having the renamed database explicitely written + in the table qualifiers. + 1. when the same database is renamed: + create trigger d1.trg1 before insert on d1.t1 for each row set @a:=1; + rename database d1 to d2; + Problem: After database renaming, the trigger's body + still points to the old database d1. + 2. when another database is renamed: + create trigger d3.trg1 before insert on d3.t1 for each row + insert into d1.t1 values (...); + rename database d1 to d2; + Problem: After renaming d1 to d2, the trigger's body + in the database d3 still points to database d1. + */ + + if ((dirp = my_dir(path,MYF(MY_DONT_SORT)))) + { + uint nfiles= (uint) dirp->number_off_files; + for (uint idx=0 ; idx < nfiles ; idx++) + { + FILEINFO *file= dirp->dir_entry + idx; + char oldname[FN_REFLEN], newname[FN_REFLEN]; + DBUG_PRINT("info",("Examining: %s", file->name)); + + /* skiping . and .. and MY_DB_OPT_FILE */ + if ((file->name[0] == '.' && + (!file->name[1] || (file->name[1] == '.' && !file->name[2]))) || + !my_strcasecmp(files_charset_info, file->name, MY_DB_OPT_FILE)) + continue; + + /* pass empty file name, and file->name as extension to avoid encoding */ + build_table_filename(oldname, sizeof(oldname)-1, + old_db->str, "", file->name); + build_table_filename(newname, sizeof(newname)-1, + new_db->str, "", file->name); + my_rename(oldname, newname, MYF(MY_WME)); + } + my_dirend(dirp); + } + + /* + Step4: TODO: moving stored procedures in the 'proc' system table + We need a new function: sp_move_db_routines(thd, olddb, newdb) + Which will basically have the same effect with: + UPDATE proc SET db='newdb' WHERE db='olddb' + Note, for 5.0 to 5.1 upgrade purposes we don't really need it. + + The biggest problem here is that we can't have a lock on LOCK_open() while + calling open_table() for 'proc'. + + Two solutions: + - Start by opening the 'event' and 'proc' (and other) tables for write + even before creating the 'to' database. (This will have the nice + effect of blocking another 'rename database' while the lock is active). + - Use the solution "Disable create of new tables during lock table" + + For an example of how to read through all rows, see: + sql_help.cc::search_topics() + */ + + /* + Step5: TODO: moving events in the 'event' system table + We need a new function evex_move_db_events(thd, olddb, newdb) + Which will have the same effect with: + UPDATE event SET db='newdb' WHERE db='olddb' + Note, for 5.0 to 5.1 upgrade purposes we don't really need it. + */ + + /* + Step6: TODO: moving grants in the 'db', 'tables_priv', 'columns_priv'. + Update each grant table, doing the same with: + UPDATE system_table SET db='newdb' WHERE db='olddb' + */ + + /* + Step7: drop the old database. + remove_db_from_cache(olddb) and query_cache_invalidate(olddb) + are done inside mysql_rm_db(), no needs to execute them again. + mysql_rm_db() also "unuses" if we drop the current database. + */ + error= mysql_rm_db(thd, old_db->str, 0, 1); + + /* Step8: logging */ + if (mysql_bin_log.is_open()) + { + Query_log_event qinfo(thd, thd->query, thd->query_length, 0, TRUE); + thd->clear_error(); + mysql_bin_log.write(&qinfo); + } + + /* Step9: Let's do "use newdb" if we renamed the current database */ + if (change_to_newdb) + error|= mysql_change_db(thd, new_db->str, 0); + +exit: + pthread_mutex_lock(&LOCK_lock_db); + /* Remove the databases from db lock cache */ + lock_db_delete(old_db->str, old_db->length); + lock_db_delete(new_db->str, new_db->length); + creating_database--; + /* Signal waiting CREATE TABLE's to continue */ + pthread_cond_signal(&COND_refresh); + pthread_mutex_unlock(&LOCK_lock_db); + + DBUG_RETURN(error); +} diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc index d8a8f28b92b..34947e35b17 100644 --- a/sql/sql_delete.cc +++ b/sql/sql_delete.cc @@ -30,7 +30,8 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, SQL_LIST *order, ha_rows limit, ulonglong options, bool reset_auto_increment) { - int error; + bool will_batch; + int error, loc_error; TABLE *table; SQL_SELECT *select=0; READ_RECORD info; @@ -39,6 +40,8 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, ha_rows deleted; uint usable_index= MAX_KEY; SELECT_LEX *select_lex= &thd->lex->select_lex; + bool ha_delete_all_rows= 0; + ulonglong const saved_options= thd->options; DBUG_ENTER("mysql_delete"); if (open_and_lock_tables(thd, table_list)) @@ -49,7 +52,6 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, table_list->view_db.str, table_list->view_name.str); DBUG_RETURN(TRUE); } - table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK); thd->proc_info="init"; table->map=1; @@ -76,10 +78,18 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, !(specialflag & (SPECIAL_NO_NEW_FUNC | SPECIAL_SAFE_MODE)) && !(table->triggers && table->triggers->has_delete_triggers())) { - deleted= table->file->records; + /* Update the table->file->records number */ + table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK); + ha_rows const maybe_deleted= table->file->records; + /* + If all rows shall be deleted, we always log this statement-based + (see [binlog], below), so we set this flag and test it below. + */ + ha_delete_all_rows= 1; if (!(error=table->file->delete_all_rows())) { error= -1; // ok + deleted= maybe_deleted; goto cleanup; } if (error != HA_ERR_WRONG_COMMAND) @@ -91,6 +101,18 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, /* Handler didn't support fast delete; Delete rows one by one */ } +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (prune_partitions(thd, table, conds)) + { + free_underlaid_joins(thd, select_lex); + thd->row_count_func= 0; + send_ok(thd); // No matching records + DBUG_RETURN(0); + } +#endif + /* Update the table->file->records number */ + table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK); + table->used_keys.clear_all(); table->quick_keys.clear_all(); // Can't use 'only index' select=make_select(table, 0, 0, conds, 0, &error); @@ -102,13 +124,11 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, free_underlaid_joins(thd, select_lex); thd->row_count_func= 0; send_ok(thd,0L); - /* We don't need to call reset_auto_increment in this case, because mysql_truncate always gives a NULL conds argument, hence we never get here. */ - DBUG_RETURN(0); // Nothing to delete } @@ -194,6 +214,16 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, deleted=0L; init_ftfuncs(thd, select_lex, 1); thd->proc_info="updating"; + will_batch= !table->file->start_bulk_delete(); + + /* + We saved the thread options above before clearing the + OPTION_BIN_LOG, and will restore below, effectively disabling the + binary log (unless it was already disabled, of course). + */ + if (ha_delete_all_rows) + thd->options&= ~static_cast<ulonglong>(OPTION_BIN_LOG); + while (!(error=info.read_record(&info)) && !thd->killed && !thd->net.report_error) { @@ -209,7 +239,7 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, break; } - if (!(error=table->file->delete_row(table->record[0]))) + if (!(error= table->file->ha_delete_row(table->record[0]))) { deleted++; if (table->triggers && @@ -245,7 +275,13 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, } if (thd->killed && !error) error= 1; // Aborted - thd->proc_info="end"; + if (will_batch && (loc_error= table->file->end_bulk_delete())) + { + if (error != 1) + table->file->print_error(loc_error,MYF(0)); + error=1; + } + thd->proc_info= "end"; end_read_record(&info); free_io_cache(table); // Will not do any harm if (options & OPTION_QUICK) @@ -278,6 +314,13 @@ cleanup: delete select; transactional_table= table->file->has_transactions(); + + /* + Restore the saved value of the OPTION_BIN_LOG bit in the thread + options before executing binlog_query() below. + */ + thd->options|= (saved_options & OPTION_BIN_LOG); + /* See similar binlogging code in sql_update.cc, for comments */ if ((error < 0) || (deleted && !transactional_table)) { @@ -285,10 +328,24 @@ cleanup: { if (error < 0) thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, - transactional_table, FALSE); - if (mysql_bin_log.write(&qinfo) && transactional_table) + + /* + [binlog]: If 'handler::delete_all_rows()' was called, we + replicate statement-based; otherwise, 'ha_delete_row()' was + used to delete specific rows which we might log row-based. + */ + THD::enum_binlog_query_type const + query_type(ha_delete_all_rows ? + THD::STMT_QUERY_TYPE : + THD::ROW_QUERY_TYPE); + int log_result= thd->binlog_query(query_type, + thd->query, thd->query_length, + transactional_table, FALSE); + + if (log_result && transactional_table) + { error=1; + } } if (!transactional_table) thd->options|=OPTION_STATUS_NO_TRANS_UPDATE; @@ -584,7 +641,7 @@ bool multi_delete::send_data(List<Item> &values) TRG_ACTION_BEFORE, FALSE)) DBUG_RETURN(1); table->status|= STATUS_DELETED; - if (!(error=table->file->delete_row(table->record[0]))) + if (!(error=table->file->ha_delete_row(table->record[0]))) { deleted++; if (table->triggers && @@ -658,7 +715,8 @@ void multi_delete::send_error(uint errcode,const char *err) int multi_delete::do_deletes() { - int local_error= 0, counter= 0; + int local_error= 0, counter= 0, error; + bool will_batch; DBUG_ENTER("do_deletes"); DBUG_ASSERT(do_delete); @@ -686,6 +744,7 @@ int multi_delete::do_deletes() been deleted by foreign key handling */ info.ignore_not_found_rows= 1; + will_batch= !table->file->start_bulk_delete(); while (!(local_error=info.read_record(&info)) && !thd->killed) { if (table->triggers && @@ -695,7 +754,7 @@ int multi_delete::do_deletes() local_error= 1; break; } - if ((local_error=table->file->delete_row(table->record[0]))) + if ((local_error=table->file->ha_delete_row(table->record[0]))) { table->file->print_error(local_error,MYF(0)); break; @@ -709,6 +768,14 @@ int multi_delete::do_deletes() break; } } + if (will_batch && (error= table->file->end_bulk_delete())) + { + if (!local_error) + { + local_error= error; + table->file->print_error(local_error,MYF(0)); + } + } end_read_record(&info); if (thd->killed && !local_error) local_error= 1; @@ -754,10 +821,13 @@ bool multi_delete::send_eof() { if (local_error == 0) thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, - transactional_tables, FALSE); - if (mysql_bin_log.write(&qinfo) && !normal_tables) + if (thd->binlog_query(THD::ROW_QUERY_TYPE, + thd->query, thd->query_length, + transactional_tables, FALSE) && + !normal_tables) + { local_error=1; // Log write failed: roll back the SQL statement + } } if (!transactional_tables) thd->options|=OPTION_STATUS_NO_TRANS_UPDATE; @@ -796,29 +866,34 @@ bool mysql_truncate(THD *thd, TABLE_LIST *table_list, bool dont_send_ok) { HA_CREATE_INFO create_info; char path[FN_REFLEN]; - TABLE **table_ptr; + TABLE *table; bool error; + uint closed_log_tables= 0, lock_logger= 0; + TABLE_LIST *tmp_table_list; + uint path_length; DBUG_ENTER("mysql_truncate"); bzero((char*) &create_info,sizeof(create_info)); /* If it is a temporary table, close and regenerate it */ - if (!dont_send_ok && (table_ptr=find_temporary_table(thd,table_list->db, - table_list->table_name))) + if (!dont_send_ok && (table= find_temporary_table(thd, table_list))) { - TABLE *table= *table_ptr; - table->file->info(HA_STATUS_AUTO | HA_STATUS_NO_LOCK); - db_type table_type= table->s->db_type; + handlerton *table_type= table->s->db_type; + TABLE_SHARE *share= table->s; if (!ha_check_storage_engine_flag(table_type, HTON_CAN_RECREATE)) goto trunc_by_del; - strmov(path, table->s->path); - *table_ptr= table->next; // Unlink table from list - close_temporary(table,0); - *fn_ext(path)=0; // Remove the .frm extension - ha_create_table(path, &create_info,1); + + table->file->info(HA_STATUS_AUTO | HA_STATUS_NO_LOCK); + + close_temporary_table(thd, table, 0, 0); // Don't free share + ha_create_table(thd, share->normalized_path.str, + share->db.str, share->table_name.str, &create_info, 1); // We don't need to call invalidate() because this table is not in cache - if ((error= (int) !(open_temporary_table(thd, path, table_list->db, - table_list->table_name, 1)))) + if ((error= (int) !(open_temporary_table(thd, share->path.str, + share->db.str, + share->table_name.str, 1)))) (void) rm_temporary_table(table_type, path); + free_table_share(share); + my_free((char*) table,MYF(0)); /* If we return here we will not have logged the truncation to the bin log and we will not send_ok() to the client. @@ -826,13 +901,12 @@ bool mysql_truncate(THD *thd, TABLE_LIST *table_list, bool dont_send_ok) goto end; } - (void) sprintf(path,"%s/%s/%s%s",mysql_data_home,table_list->db, - table_list->table_name,reg_ext); - fn_format(path, path, "", "", MY_UNPACK_FILENAME); + path_length= build_table_filename(path, sizeof(path), table_list->db, + table_list->table_name, reg_ext); if (!dont_send_ok) { - db_type table_type; + enum legacy_db_type table_type; mysql_frm_type(thd, path, &table_type); if (table_type == DB_TYPE_UNKNOWN) { @@ -840,15 +914,43 @@ bool mysql_truncate(THD *thd, TABLE_LIST *table_list, bool dont_send_ok) table_list->db, table_list->table_name); DBUG_RETURN(TRUE); } - if (!ha_check_storage_engine_flag(table_type, HTON_CAN_RECREATE) + if (!ha_check_storage_engine_flag(ha_resolve_by_legacy_type(thd, table_type), + HTON_CAN_RECREATE) || thd->lex->sphead) goto trunc_by_del; + if (lock_and_wait_for_table_name(thd, table_list)) DBUG_RETURN(TRUE); } - *fn_ext(path)=0; // Remove the .frm extension - error= ha_create_table(path,&create_info,1); + /* close log tables in use */ + if (!my_strcasecmp(system_charset_info, table_list->db, "mysql")) + { + if (!my_strcasecmp(system_charset_info, table_list->table_name, + "general_log")) + { + lock_logger= 1; + logger.lock(); + logger.close_log_table(QUERY_LOG_GENERAL, FALSE); + closed_log_tables= closed_log_tables | QUERY_LOG_GENERAL; + } + else + if (!my_strcasecmp(system_charset_info, table_list->table_name, + "slow_log")) + { + lock_logger= 1; + logger.lock(); + logger.close_log_table(QUERY_LOG_SLOW, FALSE); + closed_log_tables= closed_log_tables | QUERY_LOG_SLOW; + } + } + + // Remove the .frm extension AIX 5.2 64-bit compiler bug (BUG#16155): this + // crashes, replacement works. *(path + path_length - reg_ext_length)= + // '\0'; + path[path_length - reg_ext_length] = 0; + error= ha_create_table(thd, path, table_list->db, table_list->table_name, + &create_info, 1); query_cache_invalidate3(thd, table_list, 0); end: @@ -858,16 +960,27 @@ end: { if (mysql_bin_log.is_open()) { + /* + TRUNCATE must always be statement-based binlogged (not row-based) so + we don't test current_stmt_binlog_row_based. + */ thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, - 0, FALSE); - mysql_bin_log.write(&qinfo); + thd->binlog_query(THD::STMT_QUERY_TYPE, + thd->query, thd->query_length, FALSE, FALSE); } send_ok(thd); // This should return record count } VOID(pthread_mutex_lock(&LOCK_open)); unlock_table_name(thd, table_list); VOID(pthread_mutex_unlock(&LOCK_open)); + + if (closed_log_tables & QUERY_LOG_SLOW) + logger.reopen_log_table(QUERY_LOG_SLOW); + + if (closed_log_tables & QUERY_LOG_GENERAL) + logger.reopen_log_table(QUERY_LOG_GENERAL); + if (lock_logger) + logger.unlock(); } else if (error) { @@ -877,7 +990,7 @@ end: } DBUG_RETURN(error); - trunc_by_del: +trunc_by_del: /* Probably InnoDB table */ ulong save_options= thd->options; table_list->lock_type= TL_WRITE; diff --git a/sql/sql_derived.cc b/sql/sql_derived.cc index e1817985cbd..5a9871c07c5 100644 --- a/sql/sql_derived.cc +++ b/sql/sql_derived.cc @@ -179,8 +179,8 @@ exit: } orig_table_list->derived_result= derived_result; orig_table_list->table= table; - orig_table_list->table_name= (char*) table->s->table_name; - orig_table_list->table_name_length= strlen((char*)table->s->table_name); + orig_table_list->table_name= table->s->table_name.str; + orig_table_list->table_name_length= table->s->table_name.length; table->derived_select_number= first_select->select_number; table->s->tmp_table= TMP_TABLE; #ifndef NO_EMBEDDED_ACCESS_CHECKS diff --git a/sql/sql_error.cc b/sql/sql_error.cc index 191a6e0a1fd..74df3b68a0d 100644 --- a/sql/sql_error.cc +++ b/sql/sql_error.cc @@ -211,8 +211,13 @@ void push_warning_printf(THD *thd, MYSQL_ERROR::enum_warning_level level, TRUE Error sending data to client */ -static const char *warning_level_names[]= {"Note", "Warning", "Error", "?"}; -static int warning_level_length[]= { 4, 7, 5, 1 }; +LEX_STRING warning_level_names[]= +{ + {(char*) STRING_WITH_LEN("Note")}, + {(char*) STRING_WITH_LEN("Warning")}, + {(char*) STRING_WITH_LEN("Error")}, + {(char*) STRING_WITH_LEN("?")} +}; bool mysqld_show_warnings(THD *thd, ulong levels_to_show) { @@ -246,8 +251,8 @@ bool mysqld_show_warnings(THD *thd, ulong levels_to_show) if (idx > unit->select_limit_cnt) break; protocol->prepare_for_resend(); - protocol->store(warning_level_names[err->level], - warning_level_length[err->level], system_charset_info); + protocol->store(warning_level_names[err->level].str, + warning_level_names[err->level].length, system_charset_info); protocol->store((uint32) err->code); protocol->store(err->msg, strlen(err->msg), system_charset_info); if (protocol->write()) diff --git a/sql/sql_handler.cc b/sql/sql_handler.cc index 1cd7778a053..9dfa20da522 100644 --- a/sql/sql_handler.cc +++ b/sql/sql_handler.cc @@ -403,7 +403,8 @@ bool mysql_ha_read(THD *thd, TABLE_LIST *tables, #if MYSQL_VERSION_ID < 40100 char buff[MAX_DBKEY_LENGTH]; if (*tables->db) - strxnmov(buff, sizeof(buff), tables->db, ".", tables->table_name, NullS); + strxnmov(buff, sizeof(buff)-1, tables->db, ".", tables->table_name, + NullS); else strncpy(buff, tables->alias, sizeof(buff)); my_error(ER_UNKNOWN_TABLE, MYF(0), buff, "HANDLER"); @@ -469,7 +470,7 @@ bool mysql_ha_read(THD *thd, TABLE_LIST *tables, if (keyname) { table->file->ha_index_or_rnd_end(); - table->file->ha_index_init(keyno); + table->file->ha_index_init(keyno, 1); error= table->file->index_first(table->record[0]); } else @@ -491,7 +492,7 @@ bool mysql_ha_read(THD *thd, TABLE_LIST *tables, case RLAST: DBUG_ASSERT(keyname != 0); table->file->ha_index_or_rnd_end(); - table->file->ha_index_init(keyno); + table->file->ha_index_init(keyno, 1); error= table->file->index_last(table->record[0]); mode=RPREV; break; @@ -530,7 +531,7 @@ bool mysql_ha_read(THD *thd, TABLE_LIST *tables, if (!(key= (byte*) thd->calloc(ALIGN_SIZE(key_len)))) goto err; table->file->ha_index_or_rnd_end(); - table->file->ha_index_init(keyno); + table->file->ha_index_init(keyno, 1); key_copy(key, table->record[0], table->key_info + keyno, key_len); error= table->file->index_read(table->record[0], key,key_len,ha_rkey_mode); @@ -640,14 +641,15 @@ int mysql_ha_flush(THD *thd, TABLE_LIST *tables, uint mode_flags, while (*table_ptr) { if ((!*tmp_tables->db || - !my_strcasecmp(&my_charset_latin1, (*table_ptr)->s->db, + !my_strcasecmp(&my_charset_latin1, (*table_ptr)->s->db.str, tmp_tables->db)) && - ! my_strcasecmp(&my_charset_latin1, (*table_ptr)->s->table_name, + ! my_strcasecmp(&my_charset_latin1, + (*table_ptr)->s->table_name.str, tmp_tables->table_name)) { DBUG_PRINT("info",("*table_ptr '%s'.'%s' as '%s'", - (*table_ptr)->s->db, - (*table_ptr)->s->table_name, + (*table_ptr)->s->db.str, + (*table_ptr)->s->table_name.str, (*table_ptr)->alias)); /* The first time it is required, lock for close_thread_table(). */ if (! did_lock && ! is_locked) @@ -717,7 +719,7 @@ static int mysql_ha_flush_table(THD *thd, TABLE **table_ptr, uint mode_flags) TABLE *table= *table_ptr; DBUG_ENTER("mysql_ha_flush_table"); DBUG_PRINT("enter",("'%s'.'%s' as '%s' flags: 0x%02x", - table->s->db, table->s->table_name, + table->s->db.str, table->s->table_name.str, table->alias, mode_flags)); if ((hash_tables= (TABLE_LIST*) hash_search(&thd->handler_tables_hash, diff --git a/sql/sql_help.cc b/sql/sql_help.cc index d6d1a6ed119..ea9bca57cc6 100644 --- a/sql/sql_help.cc +++ b/sql/sql_help.cc @@ -286,8 +286,8 @@ int get_topics_for_keyword(THD *thd, TABLE *topics, TABLE *relations, rtopic_id= find_fields[help_relation_help_topic_id].field; rkey_id= find_fields[help_relation_help_keyword_id].field; - topics->file->ha_index_init(iindex_topic); - relations->file->ha_index_init(iindex_relations); + topics->file->ha_index_init(iindex_topic,1); + relations->file->ha_index_init(iindex_relations,1); rkey_id->store((longlong) key_id, TRUE); rkey_id->get_key_image(buff, rkey_id->pack_length(), Field::itRAW); diff --git a/sql/sql_insert.cc b/sql/sql_insert.cc index eb74144c1ea..bd7be110b88 100644 --- a/sql/sql_insert.cc +++ b/sql/sql_insert.cc @@ -21,6 +21,7 @@ #include "sp_head.h" #include "sql_trigger.h" #include "sql_select.h" +#include "sql_show.h" static int check_null_fields(THD *thd,TABLE *entry); #ifndef EMBEDDED_LIBRARY @@ -96,13 +97,18 @@ static int check_insert_fields(THD *thd, TABLE_LIST *table_list, Field_iterator_table fields; fields.set_table(table); if (check_grant_all_columns(thd, INSERT_ACL, &table->grant, - table->s->db, table->s->table_name, + table->s->db.str, table->s->table_name.str, &fields)) return -1; } #endif clear_timestamp_auto_bits(table->timestamp_field_type, TIMESTAMP_AUTO_SET_ON_INSERT); + /* + No fields are provided so all fields must be provided in the values. + Thus we set all bits in the write set. + */ + table->file->ha_set_all_bits_in_write_set(); } else { // Part field list @@ -129,7 +135,11 @@ static int check_insert_fields(THD *thd, TABLE_LIST *table_list, */ table_list->next_local= 0; context->resolve_in_table_list_only(table_list); - res= setup_fields(thd, 0, fields, 1, 0, 0); + /* + Indicate fields in list is to be updated by setting set_query_id + parameter to 2. This sets the bit in the write_set for each field. + */ + res= setup_fields(thd, 0, fields, 2, 0, 0); /* Restore the current context. */ ctx_state.restore_state(context, table_list); @@ -222,9 +232,10 @@ static int check_update_fields(THD *thd, TABLE_LIST *insert_table_list, /* Check the fields we are going to modify. This will set the query_id - of all used fields to the threads query_id. + of all used fields to the threads query_id. It will also set all + fields into the write set of this table. */ - if (setup_fields(thd, 0, update_fields, 1, 0, 0)) + if (setup_fields(thd, 0, update_fields, 2, 0, 0)) return -1; if (table->timestamp_field) @@ -234,7 +245,10 @@ static int check_update_fields(THD *thd, TABLE_LIST *insert_table_list, clear_timestamp_auto_bits(table->timestamp_field_type, TIMESTAMP_AUTO_SET_ON_UPDATE); else + { table->timestamp_field->query_id= timestamp_query_id; + table->file->ha_set_bit_in_write_set(table->timestamp_field->fieldnr); + } } return 0; @@ -563,10 +577,13 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list, { if (error <= 0) thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, - transactional_table, FALSE); - if (mysql_bin_log.write(&qinfo) && transactional_table) - error=1; + if (thd->binlog_query(THD::ROW_QUERY_TYPE, + thd->query, thd->query_length, + transactional_table, FALSE) && + transactional_table) + { + error=1; + } } if (!transactional_table) thd->options|=OPTION_STATUS_NO_TRANS_UPDATE; @@ -659,8 +676,8 @@ static bool check_view_insertability(THD * thd, TABLE_LIST *view) *trans_end= trans_start + num; Field_translator *trans; Field **field_ptr= table->field; - uint used_fields_buff_size= (table->s->fields + 7) / 8; - uchar *used_fields_buff= (uchar*)thd->alloc(used_fields_buff_size); + uint used_fields_buff_size= bitmap_buffer_size(table->s->fields); + uint32 *used_fields_buff= (uint32*)thd->alloc(used_fields_buff_size); MY_BITMAP used_fields; DBUG_ENTER("check_key_in_view"); @@ -669,8 +686,7 @@ static bool check_view_insertability(THD * thd, TABLE_LIST *view) DBUG_ASSERT(view->table != 0 && view->field_translation != 0); - VOID(bitmap_init(&used_fields, used_fields_buff, used_fields_buff_size * 8, - 0)); + VOID(bitmap_init(&used_fields, used_fields_buff, table->s->fields, 0)); bitmap_clear_all(&used_fields); view->contain_auto_increment= 0; @@ -883,7 +899,7 @@ bool mysql_prepare_insert(THD *thd, TABLE_LIST *table_list, select_lex->first_execution= 0; } if (duplic == DUP_UPDATE || duplic == DUP_REPLACE) - table->file->extra(HA_EXTRA_RETRIEVE_PRIMARY_KEY); + table->file->ha_retrieve_all_pk(); DBUG_RETURN(FALSE); } @@ -933,10 +949,11 @@ int write_record(THD *thd, TABLE *table,COPY_INFO *info) DBUG_ENTER("write_record"); info->records++; + if (info->handle_duplicates == DUP_REPLACE || info->handle_duplicates == DUP_UPDATE) { - while ((error=table->file->write_row(table->record[0]))) + while ((error=table->file->ha_write_row(table->record[0]))) { uint key_nr; if (error != HA_WRITE_SKIP) @@ -1020,7 +1037,7 @@ int write_record(THD *thd, TABLE *table,COPY_INFO *info) thd->clear_next_insert_id= 0; thd->next_insert_id= 0; } - if ((error=table->file->update_row(table->record[1],table->record[0]))) + if ((error=table->file->ha_update_row(table->record[1],table->record[0]))) { if ((error == HA_ERR_FOUND_DUPP_KEY) && info->ignore) goto ok_or_after_trg_err; @@ -1059,8 +1076,8 @@ int write_record(THD *thd, TABLE *table,COPY_INFO *info) thd->clear_next_insert_id= 0; thd->next_insert_id= 0; } - if ((error=table->file->update_row(table->record[1], - table->record[0]))) + if ((error=table->file->ha_update_row(table->record[1], + table->record[0]))) goto err; info->deleted++; trg_error= (table->triggers && @@ -1077,7 +1094,7 @@ int write_record(THD *thd, TABLE *table,COPY_INFO *info) table->triggers->process_triggers(thd, TRG_EVENT_DELETE, TRG_ACTION_BEFORE, TRUE)) goto before_trg_err; - if ((error=table->file->delete_row(table->record[1]))) + if ((error=table->file->ha_delete_row(table->record[1]))) goto err; info->deleted++; if (!table->file->has_transactions()) @@ -1098,7 +1115,7 @@ int write_record(THD *thd, TABLE *table,COPY_INFO *info) table->triggers->process_triggers(thd, TRG_EVENT_INSERT, TRG_ACTION_AFTER, TRUE)); } - else if ((error=table->file->write_row(table->record[0]))) + else if ((error=table->file->ha_write_row(table->record[0]))) { if (!info->ignore || (error != HA_ERR_FOUND_DUPP_KEY && error != HA_ERR_FOUND_DUPP_UNIQUE)) @@ -1184,16 +1201,15 @@ int check_that_all_fields_are_given_values(THD *thd, TABLE *entry, class delayed_row :public ilink { public: - char *record,*query; + char *record; enum_duplicates dup; time_t start_time; bool query_start_used,last_insert_id_used,insert_id_used, ignore, log_query; ulonglong last_insert_id; timestamp_auto_set_type timestamp_field_type; - uint query_length; delayed_row(enum_duplicates dup_arg, bool ignore_arg, bool log_query_arg) - :record(0), query(0), dup(dup_arg), ignore(ignore_arg), log_query(log_query_arg) {} + :record(0), dup(dup_arg), ignore(ignore_arg), log_query(log_query_arg) {} ~delayed_row() { x_free(record); @@ -1203,6 +1219,9 @@ public: class delayed_insert :public ilink { uint locks_in_memory; + char *query; + ulong query_length; + ulong query_allocated; public: THD thd; TABLE *table; @@ -1216,7 +1235,7 @@ public: TABLE_LIST table_list; // Argument delayed_insert() - :locks_in_memory(0), + :locks_in_memory(0), query(0), query_length(0), query_allocated(0), table(0),tables_in_use(0),stacked_inserts(0), status(0), dead(0), group_count(0) { @@ -1242,6 +1261,7 @@ public: } ~delayed_insert() { + my_free(query, MYF(MY_WME|MY_ALLOW_ZERO_PTR)); /* The following is not really needed, but just for safety */ delayed_row *row; while ((row=rows.get())) @@ -1261,6 +1281,25 @@ public: VOID(pthread_cond_broadcast(&COND_thread_count)); /* Tell main we are ready */ } + int set_query(char const *q, ulong qlen) { + if (q && qlen > 0) + { + if (query_allocated < qlen + 1) + { + ulong const flags(MY_WME|MY_FREE_ON_ERROR|MY_ALLOW_ZERO_PTR); + query= my_realloc(query, qlen + 1, MYF(flags)); + if (query == 0) + return HA_ERR_OUT_OF_MEM; + query_allocated= qlen; + } + query_length= qlen; + memcpy(query, q, qlen + 1); + } + else + query_length= 0; + return 0; + } + /* The following is for checking when we can delete ourselves */ inline void lock() { @@ -1299,8 +1338,8 @@ delayed_insert *find_handler(THD *thd, TABLE_LIST *table_list) delayed_insert *tmp; while ((tmp=it++)) { - if (!strcmp(tmp->thd.db,table_list->db) && - !strcmp(table_list->table_name,tmp->table->s->table_name)) + if (!strcmp(tmp->thd.db, table_list->db) && + !strcmp(table_list->table_name, tmp->table->s->table_name.str)) { tmp->lock(); break; @@ -1453,6 +1492,7 @@ TABLE *delayed_insert::get_local_table(THD* client_thd) my_ptrdiff_t adjust_ptrs; Field **field,**org_field, *found_next_number_field; TABLE *copy; + TABLE_SHARE *share= table->s; /* First request insert thread to get a lock */ status=1; @@ -1478,19 +1518,16 @@ TABLE *delayed_insert::get_local_table(THD* client_thd) client_thd->proc_info="allocating local table"; copy= (TABLE*) client_thd->alloc(sizeof(*copy)+ - (table->s->fields+1)*sizeof(Field**)+ - table->s->reclength); + (share->fields+1)*sizeof(Field**)+ + share->reclength); if (!copy) goto error; *copy= *table; - copy->s= ©->share_not_to_be_used; - // No name hashing - bzero((char*) ©->s->name_hash,sizeof(copy->s->name_hash)); - /* We don't need to change the file handler here */ + /* We don't need to change the file handler here */ field=copy->field=(Field**) (copy+1); - copy->record[0]=(byte*) (field+table->s->fields+1); - memcpy((char*) copy->record[0],(char*) table->record[0],table->s->reclength); + copy->record[0]=(byte*) (field+share->fields+1); + memcpy((char*) copy->record[0],(char*) table->record[0],share->reclength); /* Make a copy of all fields */ @@ -1502,7 +1539,7 @@ TABLE *delayed_insert::get_local_table(THD* client_thd) if (!(*field= (*org_field)->new_field(client_thd->mem_root,copy))) return 0; (*field)->orig_table= copy; // Remove connection - (*field)->move_field(adjust_ptrs); // Point at copy->record[0] + (*field)->move_field_offset(adjust_ptrs); // Point at copy->record[0] if (*org_field == found_next_number_field) (*field)->table->found_next_number_field= *field; } @@ -1513,13 +1550,11 @@ TABLE *delayed_insert::get_local_table(THD* client_thd) { /* Restore offset as this may have been reset in handle_inserts */ copy->timestamp_field= - (Field_timestamp*) copy->field[table->s->timestamp_field_offset]; + (Field_timestamp*) copy->field[share->timestamp_field_offset]; copy->timestamp_field->unireg_check= table->timestamp_field->unireg_check; copy->timestamp_field_type= copy->timestamp_field->get_auto_set_type(); } - /* _rowid is not used with delayed insert */ - copy->rowid_field=0; /* Adjust in_use for pointing to client thread */ copy->in_use= client_thd; @@ -1537,8 +1572,9 @@ TABLE *delayed_insert::get_local_table(THD* client_thd) /* Put a question in queue */ -static int write_delayed(THD *thd,TABLE *table,enum_duplicates duplic, bool ignore, - char *query, uint query_length, bool log_on) +static int write_delayed(THD *thd,TABLE *table,enum_duplicates duplic, + bool ignore, char *query, uint query_length, + bool log_on) { delayed_row *row=0; delayed_insert *di=thd->di; @@ -1553,18 +1589,10 @@ static int write_delayed(THD *thd,TABLE *table,enum_duplicates duplic, bool igno if (thd->killed || !(row= new delayed_row(duplic, ignore, log_on))) goto err; - if (!query) - query_length=0; - if (!(row->record= (char*) my_malloc(table->s->reclength+query_length+1, - MYF(MY_WME)))) + if (!(row->record= (char*) my_malloc(table->s->reclength, MYF(MY_WME)))) goto err; memcpy(row->record, table->record[0], table->s->reclength); - if (query_length) - { - row->query= row->record+table->s->reclength; - memcpy(row->query,query,query_length+1); - } - row->query_length= query_length; + di->set_query(query, query_length); row->start_time= thd->start_time; row->query_start_used= thd->query_start_used; row->last_insert_id_used= thd->last_insert_id_used; @@ -1888,7 +1916,9 @@ bool delayed_insert::handle_inserts(void) { int error; ulong max_rows; - bool using_ignore=0, using_bin_log=mysql_bin_log.is_open(); + bool using_ignore=0, + using_bin_log= mysql_bin_log.is_open(); + delayed_row *row; DBUG_ENTER("handle_inserts"); @@ -1901,7 +1931,7 @@ bool delayed_insert::handle_inserts(void) if (thr_upgrade_write_delay_lock(*thd.lock->locks)) { /* This can only happen if thread is killed by shutdown */ - sql_print_error(ER(ER_DELAYED_CANT_CHANGE_LOCK),table->s->table_name); + sql_print_error(ER(ER_DELAYED_CANT_CHANGE_LOCK),table->s->table_name.str); goto err; } @@ -1954,11 +1984,6 @@ bool delayed_insert::handle_inserts(void) using_ignore=0; table->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY); } - if (row->query && row->log_query && using_bin_log) - { - Query_log_event qinfo(&thd, row->query, row->query_length, 0, FALSE); - mysql_bin_log.write(&qinfo); - } if (table->s->blob_fields) free_delayed_insert_blobs(table); thread_safe_sub(delayed_rows_in_use,1,&LOCK_delayed_status); @@ -1973,8 +1998,7 @@ bool delayed_insert::handle_inserts(void) on this table until all entries has been processed */ if (group_count++ >= max_rows && (row= rows.head()) && - (!(row->log_query & using_bin_log) || - row->query)) + (!(row->log_query & using_bin_log))) { group_count=0; if (stacked_inserts || tables_in_use) // Let these wait a while @@ -1994,7 +2018,8 @@ bool delayed_insert::handle_inserts(void) if (thr_reschedule_write_lock(*thd.lock->locks)) { /* This should never happen */ - sql_print_error(ER(ER_DELAYED_CANT_CHANGE_LOCK),table->s->table_name); + sql_print_error(ER(ER_DELAYED_CANT_CHANGE_LOCK), + table->s->table_name.str); } if (!using_bin_log) table->file->extra(HA_EXTRA_WRITE_CACHE); @@ -2009,6 +2034,11 @@ bool delayed_insert::handle_inserts(void) thd.proc_info=0; table->next_number_field=0; pthread_mutex_unlock(&mutex); + + /* After releasing the mutex, to prevent deadlocks. */ + if (mysql_bin_log.is_open()) + thd.binlog_query(THD::ROW_QUERY_TYPE, query, query_length, FALSE, FALSE); + if ((error=table->file->extra(HA_EXTRA_NO_CACHE))) { // This shouldn't happen table->file->print_error(error,MYF(0)); @@ -2206,6 +2236,16 @@ select_insert::prepare(List<Item> &values, SELECT_LEX_UNIT *u) check_that_all_fields_are_given_values(thd, table, table_list)) || table_list->prepare_where(thd, 0, TRUE) || table_list->prepare_check_option(thd)); + + /* + For non-transactional non-temporary tables, we set the + OPTION_STATUS_NO_TRANS_UPDATE flag here. The send_eof() function + is used by both the select_insert and the select_create classes, + so setting it there would clash. + */ + if (!(table->file->has_transactions() || table->s->tmp_table)) + thd->options|=OPTION_STATUS_NO_TRANS_UPDATE; + DBUG_RETURN(res); } @@ -2247,7 +2287,7 @@ select_insert::~select_insert() if (table) { table->next_number_field=0; - table->file->reset(); + table->file->ha_reset(); } thd->count_cuted_fields= CHECK_FIELD_IGNORE; thd->abort_on_warning= 0; @@ -2335,9 +2375,31 @@ void select_insert::send_error(uint errcode,const char *err) table->file->end_bulk_insert(); /* If at least one row has been inserted/modified and will stay in the table - (the table doesn't have transactions) (example: we got a duplicate key - error while inserting into a MyISAM table) we must write to the binlog (and + (the table doesn't have transactions) we must write to the binlog (and the error code will make the slave stop). + + For many errors (example: we got a duplicate key error while + inserting into a MyISAM table), no row will be added to the table, + so passing the error to the slave will not help since there will + be an error code mismatch (the inserts will succeed on the slave + with no error). + + If we are using row-based replication we have two cases where this + code is executed: replication of CREATE-SELECT and replication of + INSERT-SELECT. + + When replicating a CREATE-SELECT statement, we shall not write the + events to the binary log. To prevent the ha_rollback_stmt() below + from writing to the binary log, we have to pretend that the table + is transactional, even if it actually is not. Therefore, the + OPTION_STATUS_NO_TRANS_UPDATE is cleared in + select_create::prepare() and will remain cleared here. + + When replicating INSERT-SELECT, we shall not write the events to + the binary log for transactional table, but shall write all events + if there is one or more writes to non-transactional tables. In + this case, the OPTION_STATUS_NO_TRANS_UPDATE is set if there is a + write to a non-transactional table, otherwise it is cleared. */ if ((info.copied || info.deleted || info.updated) && !table->file->has_transactions()) @@ -2346,11 +2408,10 @@ void select_insert::send_error(uint errcode,const char *err) thd->insert_id(last_insert_id); // For binary log if (mysql_bin_log.is_open()) { - Query_log_event qinfo(thd, thd->query, thd->query_length, - table->file->has_transactions(), FALSE); - mysql_bin_log.write(&qinfo); + thd->binlog_query(THD::ROW_QUERY_TYPE, thd->query, thd->query_length, + table->file->has_transactions(), FALSE); } - if (!table->s->tmp_table) + if (!thd->current_stmt_binlog_row_based && !table->s->tmp_table) thd->options|=OPTION_STATUS_NO_TRANS_UPDATE; } if (info.copied || info.deleted || info.updated) @@ -2372,26 +2433,36 @@ bool select_insert::send_eof() /* We must invalidate the table in the query cache before binlog writing - and ha_autocommit_or_rollback - */ + and ha_autocommit_or_rollback. + + If nothing was inserted in the table, there is no need to emit a + ROLLBACK statement to the binary log, so in that case we clear + OPTION_STATUS_NO_TRANS_UPDATE. + Observe that select_insert::send_eof() is used by both + select_insert and select_create and that they set the flag in + different manners. See Note 1 below for more info. + */ if (info.copied || info.deleted || info.updated) - { query_cache_invalidate3(thd, table, 1); - if (!(table->file->has_transactions() || table->s->tmp_table)) - thd->options|=OPTION_STATUS_NO_TRANS_UPDATE; - } + else + thd->options&= ~OPTION_STATUS_NO_TRANS_UPDATE; if (last_insert_id) thd->insert_id(last_insert_id); // For binary log - /* Write to binlog before commiting transaction */ + /* + Write to binlog before commiting transaction. No statement will + be written by the binlog_query() below in RBR mode. All the + events are in the transaction cache and will be written when + ha_autocommit_or_rollback() is issued below. + */ if (mysql_bin_log.is_open()) { if (!error) thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, - table->file->has_transactions(), FALSE); - mysql_bin_log.write(&qinfo); + thd->binlog_query(THD::ROW_QUERY_TYPE, + thd->query, thd->query_length, + table->file->has_transactions(), FALSE); } if ((error2=ha_autocommit_or_rollback(thd,error)) && ! error) error=error2; @@ -2461,8 +2532,62 @@ select_create::prepare(List<Item> &values, SELECT_LEX_UNIT *u) } +void +select_create::binlog_show_create_table() +{ + /* + Note 1: In RBR mode, we generate a CREATE TABLE statement for the + created table by calling store_create_info() (behaves as SHOW + CREATE TABLE). In the event of an error, nothing should be + written to the binary log, even if the table is non-transactional; + therefore we pretend that the generated CREATE TABLE statement is + for a transactional table. The event will then be put in the + transaction cache, and any subsequent events (e.g., table-map + events and binrow events) will also be put there. We can then use + ha_autocommit_or_rollback() to either throw away the entire + kaboodle of events, or write them to the binary log. + + We write the CREATE TABLE statement here and not in prepare() + since there potentially are sub-selects or accesses to information + schema that will do a close_thread_tables(), destroying the + statement transaction cache. + + To ensure that the event kaboodle is not written to the binary log + on rollback, we clear the OPTION_STATUS_NO_TRANS_UPDATE bit of + thd->options. + */ + DBUG_ASSERT(thd->current_stmt_binlog_row_based && !create_table_written); + + thd->options&= ~OPTION_STATUS_NO_TRANS_UPDATE; + char buf[2048]; + String query(buf, sizeof(buf), system_charset_info); + query.length(0); // Have to zero it since constructor doesn't + + TABLE_LIST tables; + memset(&tables, 0, sizeof(tables)); + tables.table = table; + + int result= store_create_info(thd, &tables, &query, create_info); + DBUG_ASSERT(result == 0); /* store_create_info() always return 0 */ + thd->binlog_query(THD::STMT_QUERY_TYPE, + query.ptr(), query.length(), + /* is_trans */ TRUE, + /* suppress_use */ FALSE); +} + + void select_create::store_values(List<Item> &values) { + /* + Before writing the first row, we write the CREATE TABLE statement + to the binlog. + */ + if (thd->current_stmt_binlog_row_based && !create_table_written) + { + binlog_show_create_table(); + create_table_written= TRUE; + } + fill_record_n_invoke_before_triggers(thd, field, values, 1, table->triggers, TRG_EVENT_INSERT); } @@ -2482,6 +2607,16 @@ void select_create::send_error(uint errcode,const char *err) bool select_create::send_eof() { + /* + If no rows where written to the binary log, we write the CREATE + TABLE statement to the binlog. + */ + if (thd->current_stmt_binlog_row_based && !create_table_written) + { + binlog_show_create_table(); + create_table_written= TRUE; + } + bool tmp=select_insert::send_eof(); if (tmp) abort(); @@ -2521,10 +2656,11 @@ void select_create::abort() if (table) { table->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY); - enum db_type table_type=table->s->db_type; + handlerton *table_type=table->s->db_type; if (!table->s->tmp_table) { ulong version= table->s->version; + table->s->version= 0; hash_delete(&open_cache,(byte*) table); if (!create_info->table_existed) quick_rm_table(table_type, create_table->db, create_table->table_name); @@ -2533,8 +2669,8 @@ void select_create::abort() VOID(pthread_cond_broadcast(&COND_refresh)); } else if (!create_info->table_existed) - close_temporary_table(thd, create_table->db, create_table->table_name); - table=0; + close_temporary_table(thd, table, 1, 1); + table=0; // Safety } VOID(pthread_mutex_unlock(&LOCK_open)); } diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc index 1ed8887a878..20c71ae738b 100644 --- a/sql/sql_lex.cc +++ b/sql/sql_lex.cc @@ -28,7 +28,8 @@ We are using pointer to this variable for distinguishing between assignment to NEW row field (when parsing trigger definition) and structured variable. */ -sys_var_long_ptr trg_new_row_fake_var(0, 0); + +sys_var *trg_new_row_fake_var= (sys_var*) 0x01; /* Macros to look like lex */ @@ -109,7 +110,7 @@ void lex_free(void) (We already do too much here) */ -void lex_start(THD *thd, uchar *buf,uint length) +void lex_start(THD *thd, const uchar *buf, uint length) { LEX *lex= thd->lex; DBUG_ENTER("lex_start"); @@ -159,6 +160,7 @@ void lex_start(THD *thd, uchar *buf,uint length) lex->yylineno = 1; lex->in_comment=0; lex->length=0; + lex->part_info= 0; lex->select_lex.in_sum_expr=0; lex->select_lex.expr_list.empty(); lex->select_lex.ftfunc_list_alloc.empty(); @@ -175,7 +177,10 @@ void lex_start(THD *thd, uchar *buf,uint length) lex->spcont= NULL; lex->proc_list.first= 0; lex->query_tables_own_last= 0; - lex->escape_used= FALSE; + lex->escape_used= lex->et_compile_phase= FALSE; + + lex->name= 0; + lex->et= NULL; if (lex->sroutines.records) my_hash_reset(&lex->sroutines); @@ -197,9 +202,9 @@ void lex_end(LEX *lex) static int find_keyword(LEX *lex, uint len, bool function) { - uchar *tok=lex->tok_start; + const uchar *tok=lex->tok_start; - SYMBOL *symbol = get_hash_symbol((const char *)tok,len,function); + SYMBOL *symbol= get_hash_symbol((const char *)tok,len,function); if (symbol) { lex->yylval->symbol.symbol=symbol; @@ -257,15 +262,16 @@ static LEX_STRING get_token(LEX *lex,uint length) static LEX_STRING get_quoted_token(LEX *lex,uint length, char quote) { LEX_STRING tmp; - byte *from, *to, *end; + const uchar *from, *end; + uchar *to; yyUnget(); // ptr points now after last token char tmp.length=lex->yytoklen=length; tmp.str=(char*) lex->thd->alloc(tmp.length+1); - for (from= (byte*) lex->tok_start, to= (byte*) tmp.str, end= to+length ; + for (from= lex->tok_start, to= (uchar*) tmp.str, end= to+length ; to != end ; ) { - if ((*to++= *from++) == quote) + if ((*to++= *from++) == (uchar) quote) from++; // Skip double quotes } *to= 0; // End null for safety @@ -285,7 +291,6 @@ static char *get_text(LEX *lex) CHARSET_INFO *cs= lex->thd->charset(); sep= yyGetLast(); // String should end with this - //lex->tok_start=lex->ptr-1; // Remember ' while (lex->ptr != lex->end_of_query) { c = yyGet(); @@ -329,7 +334,8 @@ static char *get_text(LEX *lex) yyUnget(); /* Found end. Unescape and return string */ - uchar *str,*end,*start; + const uchar *str, *end; + uchar *start; str=lex->tok_start+1; end=lex->ptr-1; @@ -613,7 +619,7 @@ int yylex(void *arg, void *yythd) break; } case MY_LEX_IDENT: - uchar *start; + const uchar *start; #if defined(USE_MB) && defined(USE_MB_IDENT) if (use_mb(cs)) { diff --git a/sql/sql_lex.h b/sql/sql_lex.h index 7b2ea359fb2..08167f8b6d2 100644 --- a/sql/sql_lex.h +++ b/sql/sql_lex.h @@ -25,7 +25,11 @@ class sp_head; class sp_name; class sp_instr; class sp_pcontext; +class st_alter_tablespace; +class partition_info; +class Event_timed; +#ifdef MYSQL_SERVER /* The following hack is needed because mysql_yacc.cc does not define YYSTYPE before including this file @@ -40,6 +44,7 @@ class sp_pcontext; #include "sql_yacc.h" #define LEX_YYSTYPE YYSTYPE * #endif +#endif /* When a command is added here, be sure it's also added in mysqld.cc @@ -52,8 +57,8 @@ enum enum_sql_command { SQLCOM_DELETE, SQLCOM_TRUNCATE, SQLCOM_DROP_TABLE, SQLCOM_DROP_INDEX, SQLCOM_SHOW_DATABASES, SQLCOM_SHOW_TABLES, SQLCOM_SHOW_FIELDS, - SQLCOM_SHOW_KEYS, SQLCOM_SHOW_VARIABLES, SQLCOM_SHOW_LOGS, SQLCOM_SHOW_STATUS, - SQLCOM_SHOW_INNODB_STATUS, SQLCOM_SHOW_NDBCLUSTER_STATUS, SQLCOM_SHOW_MUTEX_STATUS, + SQLCOM_SHOW_KEYS, SQLCOM_SHOW_VARIABLES, SQLCOM_SHOW_STATUS, + SQLCOM_SHOW_ENGINE_LOGS, SQLCOM_SHOW_ENGINE_STATUS, SQLCOM_SHOW_ENGINE_MUTEX, SQLCOM_SHOW_PROCESSLIST, SQLCOM_SHOW_MASTER_STAT, SQLCOM_SHOW_SLAVE_STAT, SQLCOM_SHOW_GRANTS, SQLCOM_SHOW_CREATE, SQLCOM_SHOW_CHARSETS, SQLCOM_SHOW_COLLATIONS, SQLCOM_SHOW_CREATE_DB, SQLCOM_SHOW_TABLE_STATUS, @@ -62,6 +67,7 @@ enum enum_sql_command { SQLCOM_LOAD,SQLCOM_SET_OPTION,SQLCOM_LOCK_TABLES,SQLCOM_UNLOCK_TABLES, SQLCOM_GRANT, SQLCOM_CHANGE_DB, SQLCOM_CREATE_DB, SQLCOM_DROP_DB, SQLCOM_ALTER_DB, + SQLCOM_RENAME_DB, SQLCOM_REPAIR, SQLCOM_REPLACE, SQLCOM_REPLACE_SELECT, SQLCOM_CREATE_FUNCTION, SQLCOM_DROP_FUNCTION, SQLCOM_REVOKE,SQLCOM_OPTIMIZE, SQLCOM_CHECK, @@ -91,6 +97,13 @@ enum enum_sql_command { SQLCOM_XA_START, SQLCOM_XA_END, SQLCOM_XA_PREPARE, SQLCOM_XA_COMMIT, SQLCOM_XA_ROLLBACK, SQLCOM_XA_RECOVER, SQLCOM_SHOW_PROC_CODE, SQLCOM_SHOW_FUNC_CODE, + SQLCOM_ALTER_TABLESPACE, + SQLCOM_INSTALL_PLUGIN, SQLCOM_UNINSTALL_PLUGIN, + SQLCOM_SHOW_AUTHORS, SQLCOM_BINLOG_BASE64_EVENT, + SQLCOM_SHOW_PLUGINS, + SQLCOM_CREATE_EVENT, SQLCOM_ALTER_EVENT, SQLCOM_DROP_EVENT, + SQLCOM_SHOW_CREATE_EVENT, SQLCOM_SHOW_EVENTS, + /* This should be the last !!! */ SQLCOM_END @@ -99,6 +112,13 @@ enum enum_sql_command { // describe/explain types #define DESCRIBE_NORMAL 1 #define DESCRIBE_EXTENDED 2 +/* + This is not within #ifdef because we want "EXPLAIN PARTITIONS ..." to produce + additional "partitions" column even if partitioning is not compiled in. +*/ +#define DESCRIBE_PARTITIONS 4 + +#ifdef MYSQL_SERVER enum enum_sp_suid_behaviour { @@ -456,7 +476,7 @@ public: void set_limit(st_select_lex *values); void set_thd(THD *thd_arg) { thd= thd_arg; } - friend void lex_start(THD *thd, uchar *buf, uint length); + friend void lex_start(THD *thd, const uchar *buf, uint length); friend int subselect_union_engine::exec(); List<Item> *get_unit_column_types(); @@ -626,7 +646,7 @@ public: void cut_subtree() { slave= 0; } bool test_limit(); - friend void lex_start(THD *thd, uchar *buf, uint length); + friend void lex_start(THD *thd, const uchar *buf, uint length); st_select_lex() {} void make_empty_select() { @@ -651,18 +671,31 @@ public: }; typedef class st_select_lex SELECT_LEX; -#define ALTER_ADD_COLUMN 1 -#define ALTER_DROP_COLUMN 2 -#define ALTER_CHANGE_COLUMN 4 -#define ALTER_ADD_INDEX 8 -#define ALTER_DROP_INDEX 16 -#define ALTER_RENAME 32 -#define ALTER_ORDER 64 -#define ALTER_OPTIONS 128 -#define ALTER_CHANGE_COLUMN_DEFAULT 256 -#define ALTER_KEYS_ONOFF 512 -#define ALTER_CONVERT 1024 -#define ALTER_FORCE 2048 +#define ALTER_ADD_COLUMN (1L << 0) +#define ALTER_DROP_COLUMN (1L << 1) +#define ALTER_CHANGE_COLUMN (1L << 2) +#define ALTER_ADD_INDEX (1L << 3) +#define ALTER_DROP_INDEX (1L << 4) +#define ALTER_RENAME (1L << 5) +#define ALTER_ORDER (1L << 6) +#define ALTER_OPTIONS (1L << 7) +#define ALTER_CHANGE_COLUMN_DEFAULT (1L << 8) +#define ALTER_KEYS_ONOFF (1L << 9) +#define ALTER_CONVERT (1L << 10) +#define ALTER_FORCE (1L << 11) +#define ALTER_RECREATE (1L << 12) +#define ALTER_ADD_PARTITION (1L << 13) +#define ALTER_DROP_PARTITION (1L << 14) +#define ALTER_COALESCE_PARTITION (1L << 15) +#define ALTER_REORGANIZE_PARTITION (1L << 16) +#define ALTER_PARTITION (1L << 17) +#define ALTER_OPTIMIZE_PARTITION (1L << 18) +#define ALTER_TABLE_REORG (1L << 19) +#define ALTER_REBUILD_PARTITION (1L << 20) +#define ALTER_ALL_PARTITION (1L << 21) +#define ALTER_ANALYZE_PARTITION (1L << 22) +#define ALTER_CHECK_PARTITION (1L << 23) +#define ALTER_REPAIR_PARTITION (1L << 24) typedef struct st_alter_info { @@ -671,9 +704,17 @@ typedef struct st_alter_info uint flags; enum enum_enable_or_disable keys_onoff; enum tablespace_op_type tablespace_op; + List<char> partition_names; + uint no_parts; st_alter_info(){clear();} - void clear(){keys_onoff= LEAVE_AS_IS;tablespace_op= NO_TABLESPACE_OP;} + void clear() + { + keys_onoff= LEAVE_AS_IS; + tablespace_op= NO_TABLESPACE_OP; + no_parts= 0; + partition_names.empty(); + } void reset(){drop_list.empty();alter_list.empty();clear();} } ALTER_INFO; @@ -692,7 +733,7 @@ struct st_trg_chistics enum trg_event_type event; }; -extern sys_var_long_ptr trg_new_row_fake_var; +extern sys_var *trg_new_row_fake_var; enum xa_option_words {XA_NONE, XA_JOIN, XA_RESUME, XA_ONE_PHASE, XA_SUSPEND, XA_FOR_MIGRATE}; @@ -709,11 +750,11 @@ typedef struct st_lex SELECT_LEX *current_select; /* list of all SELECT_LEX */ SELECT_LEX *all_selects_list; - uchar *buf; /* The beginning of string, used by SPs */ - uchar *ptr,*tok_start,*tok_end,*end_of_query; + const uchar *buf; /* The beginning of string, used by SPs */ + const uchar *ptr,*tok_start,*tok_end,*end_of_query; /* The values of tok_start/tok_end as they were one call of yylex before */ - uchar *tok_start_prev, *tok_end_prev; + const uchar *tok_start_prev, *tok_end_prev; char *length,*dec,*change,*name; char *help_arg; @@ -743,6 +784,8 @@ typedef struct st_lex TABLE_LIST *leaf_tables_insert; /* Position (first character index) of SELECT of CREATE VIEW statement */ uint create_view_select_start; + /* Partition info structure filled in by PARTITION BY parse part */ + partition_info *part_info; /* The definer of the object being created (view, trigger, stored routine). @@ -776,6 +819,7 @@ typedef struct st_lex required a local context, the parser pops the top-most context. */ List<Name_resolution_context> context_stack; + List<LEX_STRING> db_list; SQL_LIST proc_list, auxilliary_table_list, save_list; create_field *last_field; @@ -887,6 +931,10 @@ typedef struct st_lex uint sroutines_list_own_elements; st_sp_chistics sp_chistics; + + Event_timed *et; + bool et_compile_phase; + bool only_view; /* used for SHOW CREATE TABLE/VIEW */ /* field_list was created for view and should be removed before PS/SP @@ -928,7 +976,13 @@ typedef struct st_lex Pointers to part of LOAD DATA statement that should be rewritten during replication ("LOCAL 'filename' REPLACE INTO" part). */ - uchar *fname_start, *fname_end; + const uchar *fname_start, *fname_end; + + /* + Reference to a struct that contains information in various commands + to add/create/drop/change table spaces. + */ + st_alter_tablespace *alter_tablespace_info; bool escape_used; @@ -1055,10 +1109,12 @@ struct st_lex_local: public st_lex extern void lex_init(void); extern void lex_free(void); -extern void lex_start(THD *thd, uchar *buf,uint length); +extern void lex_start(THD *thd, const uchar *buf, uint length); extern void lex_end(LEX *lex); extern int yylex(void *arg, void *yythd); extern pthread_key(LEX*,THR_LEX); #define current_lex (current_thd->lex) + +#endif diff --git a/sql/sql_list.h b/sql/sql_list.h index b2bcc4ea401..05f589a2c23 100644 --- a/sql/sql_list.h +++ b/sql/sql_list.h @@ -441,6 +441,28 @@ struct ilink }; +/* Needed to be able to have an I_List of char* strings in mysqld.cc. */ + +class i_string: public ilink +{ +public: + const char* ptr; + i_string():ptr(0) { } + i_string(const char* s) : ptr(s) {} +}; + +/* needed for linked list of two strings for replicate-rewrite-db */ +class i_string_pair: public ilink +{ +public: + const char* key; + const char* val; + i_string_pair():key(0),val(0) { } + i_string_pair(const char* key_arg, const char* val_arg) : + key(key_arg),val(val_arg) {} +}; + + template <class T> class I_List_iterator; /* diff --git a/sql/sql_load.cc b/sql/sql_load.cc index cc724c102a4..30d83370255 100644 --- a/sql/sql_load.cc +++ b/sql/sql_load.cc @@ -176,7 +176,7 @@ bool mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, The main thing to fix to remove this restriction is to ensure that the table is marked to be 'used for insert' in which case we should never - mark this table as as 'const table' (ie, one that has only one row). + mark this table as 'const table' (ie, one that has only one row). */ if (unique_table(thd, table_list, table_list->next_global)) { @@ -192,6 +192,10 @@ bool mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, Field **field; for (field=table->field; *field ; field++) fields_vars.push_back(new Item_field(*field)); + /* + Since all fields are set we set all bits in the write set + */ + table->file->ha_set_all_bits_in_write_set(); table->timestamp_field_type= TIMESTAMP_NO_AUTO_SET; /* Let us also prepare SET clause, altough it is probably empty @@ -204,8 +208,15 @@ bool mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, else { // Part field list /* TODO: use this conds for 'WITH CHECK OPTIONS' */ - if (setup_fields(thd, 0, fields_vars, 1, 0, 0) || - setup_fields(thd, 0, set_fields, 1, 0, 0) || + /* + Indicate that both variables in field list and fields in update_list + is to be included in write set of table. We do however set all bits + in write set anyways since it is not allowed to specify NULLs in + LOAD DATA + */ + table->file->ha_set_all_bits_in_write_set(); + if (setup_fields(thd, 0, fields_vars, 2, 0, 0) || + setup_fields(thd, 0, set_fields, 2, 0, 0) || check_that_all_fields_are_given_values(thd, table, table_list)) DBUG_RETURN(TRUE); /* @@ -275,7 +286,7 @@ bool mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, #endif if (!dirname_length(ex->file_name)) { - strxnmov(name, FN_REFLEN, mysql_real_data_home, tdb, NullS); + strxnmov(name, FN_REFLEN-1, mysql_real_data_home, tdb, NullS); (void) fn_format(name, ex->file_name, name, "", MY_RELATIVE_PATH | MY_UNPACK_FILENAME); } @@ -406,38 +417,55 @@ bool mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, #ifndef EMBEDDED_LIBRARY if (mysql_bin_log.is_open()) { +#ifdef HAVE_ROW_BASED_REPLICATION /* - Make sure last block (the one which caused the error) gets logged. - This is needed because otherwise after write of - (to the binlog, not to read_info (which is a cache)) - Delete_file_log_event the bad block will remain in read_info (because - pre_read is not called at the end of the last block; remember pre_read - is called whenever a new block is read from disk). - At the end of mysql_load(), the destructor of read_info will call - end_io_cache() which will flush read_info, so we will finally have - this in the binlog: - Append_block # The last successfull block - Delete_file - Append_block # The failing block - which is nonsense. - Or could also be (for a small file) - Create_file # The failing block - which is nonsense (Delete_file is not written in this case, because: - Create_file has not been written, so Delete_file is not written, then - when read_info is destroyed end_io_cache() is called which writes - Create_file. + We need to do the job that is normally done inside + binlog_query() here, which is to ensure that the pending event + is written before tables are unlocked and before any other + events are written. We also need to update the table map + version for the binary log to mark that table maps are invalid + after this point. */ - read_info.end_io_cache(); - /* If the file was not empty, wrote_create_file is true */ - if (lf_info.wrote_create_file) + if (thd->current_stmt_binlog_row_based) + thd->binlog_flush_pending_rows_event(true); + else +#endif { - if ((info.copied || info.deleted) && !transactional_table) - write_execute_load_query_log_event(thd, handle_duplicates, - ignore, transactional_table); - else + /* + Make sure last block (the one which caused the error) gets + logged. This is needed because otherwise after write of (to + the binlog, not to read_info (which is a cache)) + Delete_file_log_event the bad block will remain in read_info + (because pre_read is not called at the end of the last + block; remember pre_read is called whenever a new block is + read from disk). At the end of mysql_load(), the destructor + of read_info will call end_io_cache() which will flush + read_info, so we will finally have this in the binlog: + + Append_block # The last successfull block + Delete_file + Append_block # The failing block + which is nonsense. + Or could also be (for a small file) + Create_file # The failing block + which is nonsense (Delete_file is not written in this case, because: + Create_file has not been written, so Delete_file is not written, then + when read_info is destroyed end_io_cache() is called which writes + Create_file. + */ + read_info.end_io_cache(); + /* If the file was not empty, wrote_create_file is true */ + if (lf_info.wrote_create_file) { - Delete_file_log_event d(thd, db, transactional_table); - mysql_bin_log.write(&d); + if ((info.copied || info.deleted) && !transactional_table) + write_execute_load_query_log_event(thd, handle_duplicates, + ignore, transactional_table); + else + { + Delete_file_log_event d(thd, db, transactional_table); + d.flags|= LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F; + mysql_bin_log.write(&d); + } } } } @@ -454,15 +482,32 @@ bool mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, #ifndef EMBEDDED_LIBRARY if (mysql_bin_log.is_open()) { +#ifdef HAVE_ROW_BASED_REPLICATION /* - As already explained above, we need to call end_io_cache() or the last - block will be logged only after Execute_load_query_log_event (which is - wrong), when read_info is destroyed. - */ - read_info.end_io_cache(); - if (lf_info.wrote_create_file) - write_execute_load_query_log_event(thd, handle_duplicates, - ignore, transactional_table); + We need to do the job that is normally done inside + binlog_query() here, which is to ensure that the pending event + is written before tables are unlocked and before any other + events are written. We also need to update the table map + version for the binary log to mark that table maps are invalid + after this point. + */ + if (thd->current_stmt_binlog_row_based) + thd->binlog_flush_pending_rows_event(true); + else +#endif + { + /* + As already explained above, we need to call end_io_cache() or the last + block will be logged only after Execute_load_query_log_event (which is + wrong), when read_info is destroyed. + */ + read_info.end_io_cache(); + if (lf_info.wrote_create_file) + { + write_execute_load_query_log_event(thd, handle_duplicates, + ignore, transactional_table); + } + } } #endif /*!EMBEDDED_LIBRARY*/ if (transactional_table) @@ -491,6 +536,7 @@ static bool write_execute_load_query_log_event(THD *thd, (duplicates == DUP_REPLACE) ? LOAD_DUP_REPLACE : (ignore ? LOAD_DUP_IGNORE : LOAD_DUP_ERROR), transactional_table, FALSE); + e.flags|= LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F; return mysql_bin_log.write(&e); } diff --git a/sql/sql_manager.cc b/sql/sql_manager.cc index 1d3acd1696c..f580bcb16d9 100644 --- a/sql/sql_manager.cc +++ b/sql/sql_manager.cc @@ -32,12 +32,43 @@ pthread_t manager_thread; pthread_mutex_t LOCK_manager; pthread_cond_t COND_manager; +struct handler_cb { + struct handler_cb *next; + void (*action)(void); +}; + +static struct handler_cb * volatile cb_list; + +bool mysql_manager_submit(void (*action)()) +{ + bool result= FALSE; + struct handler_cb * volatile *cb; + pthread_mutex_lock(&LOCK_manager); + cb= &cb_list; + while (*cb && (*cb)->action != action) + cb= &(*cb)->next; + if (!*cb) + { + *cb= (struct handler_cb *)my_malloc(sizeof(struct handler_cb), MYF(MY_WME)); + if (!*cb) + result= TRUE; + else + { + (*cb)->next= NULL; + (*cb)->action= action; + } + } + pthread_mutex_unlock(&LOCK_manager); + return result; +} + pthread_handler_t handle_manager(void *arg __attribute__((unused))) { int error = 0; ulong status; struct timespec abstime; bool reset_flush_time = TRUE; + struct handler_cb *cb= NULL; my_thread_init(); DBUG_ENTER("handle_manager"); @@ -68,6 +99,11 @@ pthread_handler_t handle_manager(void *arg __attribute__((unused))) } status = manager_status; manager_status = 0; + if (cb == NULL) + { + cb= cb_list; + cb_list= NULL; + } pthread_mutex_unlock(&LOCK_manager); if (abort_loop) @@ -80,13 +116,13 @@ pthread_handler_t handle_manager(void *arg __attribute__((unused))) reset_flush_time = TRUE; } -#ifdef HAVE_BERKELEY_DB - if (status & MANAGER_BERKELEY_LOG_CLEANUP) + while (cb) { - berkeley_cleanup_log_files(); - status &= ~MANAGER_BERKELEY_LOG_CLEANUP; + struct handler_cb *next= cb->next; + cb->action(); + my_free((gptr)cb, MYF(0)); + cb= next; } -#endif if (status) DBUG_PRINT("error", ("manager did not handle something: %lx", status)); diff --git a/sql/sql_manager.h b/sql/sql_manager.h index 35704705820..d42deb8ff81 100644 --- a/sql/sql_manager.h +++ b/sql/sql_manager.h @@ -14,6 +14,6 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#ifdef HAVE_BERKELEY_DB +#ifdef WITH_BERKELEY_STORAGE_ENGINE void berkeley_cleanup_log_files(void); -#endif /* HAVE_BERKELEY_DB */ +#endif /* WITH_BERKELEY_STORAGE_ENGINE */ diff --git a/sql/sql_map.cc b/sql/sql_map.cc index 56b4b765355..8376b3bbfcc 100644 --- a/sql/sql_map.cc +++ b/sql/sql_map.cc @@ -25,10 +25,6 @@ #include <sys/mman.h> #endif -#ifndef MAP_NORESERVE -#define MAP_NORESERVE 0 // For IRIX -#endif - mapped_files::mapped_files(const my_string filename,byte *magic,uint magic_length) { #ifdef HAVE_MMAP diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index b2066953cf5..4ee47a574b1 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -16,22 +16,16 @@ #include "mysql_priv.h" #include "sql_repl.h" +#include "rpl_filter.h" #include "repl_failsafe.h" #include <m_ctype.h> #include <myisam.h> #include <my_dir.h> -#ifdef HAVE_INNOBASE_DB -#include "ha_innodb.h" -#endif - -#ifdef HAVE_NDBCLUSTER_DB -#include "ha_ndbcluster.h" -#endif - #include "sp_head.h" #include "sp.h" #include "sp_cache.h" +#include "event.h" #ifdef HAVE_OPENSSL /* @@ -74,19 +68,41 @@ static bool check_db_used(THD *thd,TABLE_LIST *tables); static bool check_multi_update_lock(THD *thd); static void remove_escape(char *name); static void refresh_status(THD *thd); -static bool append_file_to_dir(THD *thd, const char **filename_ptr, - const char *table_name); const char *any_db="*any*"; // Special symbol for check_access -const char *command_name[]={ - "Sleep", "Quit", "Init DB", "Query", "Field List", "Create DB", - "Drop DB", "Refresh", "Shutdown", "Statistics", "Processlist", - "Connect","Kill","Debug","Ping","Time","Delayed insert","Change user", - "Binlog Dump","Table Dump", "Connect Out", "Register Slave", - "Prepare", "Execute", "Long Data", "Close stmt", - "Reset stmt", "Set option", "Fetch", - "Error" // Last command number +LEX_STRING command_name[]={ + (char *)STRING_WITH_LEN("Sleep"), + (char *)STRING_WITH_LEN("Quit"), + (char *)STRING_WITH_LEN("Init DB"), + (char *)STRING_WITH_LEN("Query"), + (char *)STRING_WITH_LEN("Field List"), + (char *)STRING_WITH_LEN("Create DB"), + (char *)STRING_WITH_LEN("Drop DB"), + (char *)STRING_WITH_LEN("Refresh"), + (char *)STRING_WITH_LEN("Shutdown"), + (char *)STRING_WITH_LEN("Statistics"), + (char *)STRING_WITH_LEN("Processlist"), + (char *)STRING_WITH_LEN("Connect"), + (char *)STRING_WITH_LEN("Kill"), + (char *)STRING_WITH_LEN("Debug"), + (char *)STRING_WITH_LEN("Ping"), + (char *)STRING_WITH_LEN("Time"), + (char *)STRING_WITH_LEN("Delayed insert"), + (char *)STRING_WITH_LEN("Change user"), + (char *)STRING_WITH_LEN("Binlog Dump"), + (char *)STRING_WITH_LEN("Table Dump"), + (char *)STRING_WITH_LEN("Connect Out"), + (char *)STRING_WITH_LEN("Register Slave"), + (char *)STRING_WITH_LEN("Prepare"), + (char *)STRING_WITH_LEN("Execute"), + (char *)STRING_WITH_LEN("Long Data"), + (char *)STRING_WITH_LEN("Close stmt"), + (char *)STRING_WITH_LEN("Reset stmt"), + (char *)STRING_WITH_LEN("Set option"), + (char *)STRING_WITH_LEN("Fetch"), + (char *)STRING_WITH_LEN("Daemon"), + (char *)STRING_WITH_LEN("Error") // Last command number }; const char *xa_state_names[]={ @@ -155,7 +171,7 @@ static bool end_active_trans(THD *thd) DBUG_RETURN(error); } -static bool begin_trans(THD *thd) +bool begin_trans(THD *thd) { int error=0; if (unlikely(thd->in_sub_stmt)) @@ -189,7 +205,8 @@ static bool begin_trans(THD *thd) */ inline bool all_tables_not_ok(THD *thd, TABLE_LIST *tables) { - return table_rules_on && tables && !tables_ok(thd,tables); + return rpl_filter->is_on() && tables && !thd->spcont && + !rpl_filter->tables_ok(thd->db, tables); } #endif @@ -327,7 +344,7 @@ int check_user(THD *thd, enum enum_server_command command, if (opt_secure_auth_local && passwd_len == SCRAMBLE_LENGTH_323) { net_printf_error(thd, ER_NOT_SUPPORTED_AUTH_MODE); - mysql_log.write(thd, COM_CONNECT, ER(ER_NOT_SUPPORTED_AUTH_MODE)); + general_log_print(thd, COM_CONNECT, ER(ER_NOT_SUPPORTED_AUTH_MODE)); DBUG_RETURN(-1); } if (passwd_len != 0 && @@ -361,9 +378,9 @@ int check_user(THD *thd, enum enum_server_command command, net_printf_error(thd, ER_SERVER_IS_IN_SECURE_AUTH_MODE, thd->main_security_ctx.user, thd->main_security_ctx.host_or_ip); - mysql_log.write(thd, COM_CONNECT, ER(ER_SERVER_IS_IN_SECURE_AUTH_MODE), - thd->main_security_ctx.user, - thd->main_security_ctx.host_or_ip); + general_log_print(thd, COM_CONNECT, ER(ER_SERVER_IS_IN_SECURE_AUTH_MODE), + thd->main_security_ctx.user, + thd->main_security_ctx.host_or_ip); DBUG_RETURN(-1); } /* We have to read very specific packet size */ @@ -411,14 +428,14 @@ int check_user(THD *thd, enum enum_server_command command, } /* Why logging is performed before all checks've passed? */ - mysql_log.write(thd, command, - (thd->main_security_ctx.priv_user == - thd->main_security_ctx.user ? - (char*) "%s@%s on %s" : - (char*) "%s@%s as anonymous on %s"), - thd->main_security_ctx.user, - thd->main_security_ctx.host_or_ip, - db ? db : (char*) ""); + general_log_print(thd, command, + (thd->main_security_ctx.priv_user == + thd->main_security_ctx.user ? + (char*) "%s@%s on %s" : + (char*) "%s@%s as anonymous on %s"), + thd->main_security_ctx.user, + thd->main_security_ctx.host_or_ip, + db ? db : (char*) ""); /* This is the default access rights for the current database. It's @@ -465,17 +482,17 @@ int check_user(THD *thd, enum enum_server_command command, else if (res == 2) // client gave short hash, server has long hash { net_printf_error(thd, ER_NOT_SUPPORTED_AUTH_MODE); - mysql_log.write(thd,COM_CONNECT,ER(ER_NOT_SUPPORTED_AUTH_MODE)); + general_log_print(thd, COM_CONNECT, ER(ER_NOT_SUPPORTED_AUTH_MODE)); DBUG_RETURN(-1); } net_printf_error(thd, ER_ACCESS_DENIED_ERROR, thd->main_security_ctx.user, thd->main_security_ctx.host_or_ip, passwd_len ? ER(ER_YES) : ER(ER_NO)); - mysql_log.write(thd, COM_CONNECT, ER(ER_ACCESS_DENIED_ERROR), - thd->main_security_ctx.user, - thd->main_security_ctx.host_or_ip, - passwd_len ? ER(ER_YES) : ER(ER_NO)); + general_log_print(thd, COM_CONNECT, ER(ER_ACCESS_DENIED_ERROR), + thd->main_security_ctx.user, + thd->main_security_ctx.host_or_ip, + passwd_len ? ER(ER_YES) : ER(ER_NO)); DBUG_RETURN(-1); #endif /* NO_EMBEDDED_ACCESS_CHECKS */ } @@ -648,6 +665,9 @@ void init_update_queries(void) uc_update_queries[SQLCOM_DROP_INDEX]=1; uc_update_queries[SQLCOM_CREATE_VIEW]=1; uc_update_queries[SQLCOM_DROP_VIEW]=1; + uc_update_queries[SQLCOM_CREATE_EVENT]=1; + uc_update_queries[SQLCOM_ALTER_EVENT]=1; + uc_update_queries[SQLCOM_DROP_EVENT]=1; } bool is_update_query(enum enum_sql_command command) @@ -1233,6 +1253,7 @@ pthread_handler_t handle_bootstrap(void *arg) thd->version=refresh_version; thd->security_ctx->priv_user= thd->security_ctx->user= (char*) my_strdup("boot", MYF(MY_WME)); + thd->security_ctx->priv_host[0]=0; buff= (char*) thd->net.buff; thd->init_for_queries(); @@ -1572,7 +1593,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd, packet, strlen(packet), thd->charset()); if (!mysql_change_db(thd, tmp.str, FALSE)) { - mysql_log.write(thd,command,"%s",thd->db); + general_log_print(thd, command, "%s",thd->db); send_ok(thd); } break; @@ -1710,7 +1731,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd, if (alloc_query(thd, packet, packet_length)) break; // fatal error is set char *packet_end= thd->query + thd->query_length; - mysql_log.write(thd,command,"%s",thd->query); + general_log_print(thd, command, "%s", thd->query); DBUG_PRINT("query",("%-.4096s",thd->query)); if (!(specialflag & SPECIAL_NO_PRIOR)) @@ -1766,8 +1787,9 @@ bool dispatch_command(enum enum_server_command command, THD *thd, TABLE_LIST table_list; LEX_STRING conv_name; /* Saved variable value */ - my_bool old_innodb_table_locks= - IF_INNOBASE_DB(thd->variables.innodb_table_locks, FALSE); + my_bool old_innodb_table_locks= thd->variables.innodb_table_locks; + + /* used as fields initializator */ lex_start(thd, 0, 0); @@ -1797,7 +1819,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd, thd->query_length= strlen(packet); // for simplicity: don't optimize if (!(thd->query=fields=thd->memdup(packet,thd->query_length+1))) break; - mysql_log.write(thd,command,"%s %s",table_list.table_name, fields); + general_log_print(thd, command, "%s %s", table_list.table_name, fields); if (lower_case_table_names) my_casedn_str(files_charset_info, table_list.table_name); remove_escape(table_list.table_name); // This can't have wildcards @@ -1826,7 +1848,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd, #endif case COM_QUIT: /* We don't calculate statistics for this command */ - mysql_log.write(thd,command,NullS); + general_log_print(thd, command, NullS); net->error=0; // Don't give 'abort' message error=TRUE; // End server break; @@ -1846,7 +1868,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd, } if (check_access(thd,CREATE_ACL,db,0,1,0,is_schema_db(db))) break; - mysql_log.write(thd,command,packet); + general_log_print(thd, command, packet); bzero(&create_info, sizeof(create_info)); mysql_create_db(thd, (lower_case_table_names == 2 ? alias : db), &create_info, 0); @@ -1871,7 +1893,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd, ER(ER_LOCK_OR_ACTIVE_TRANSACTION), MYF(0)); break; } - mysql_log.write(thd,command,db); + general_log_print(thd, command, db); mysql_rm_db(thd, db, 0, 0); break; } @@ -1895,7 +1917,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd, kill_zombie_dump_threads(slave_server_id); thd->server_id = slave_server_id; - mysql_log.write(thd, command, "Log: '%s' Pos: %ld", packet+10, + general_log_print(thd, command, "Log: '%s' Pos: %ld", packet+10, (long) pos); mysql_binlog_send(thd, thd->strdup(packet + 10), (my_off_t) pos, flags); unregister_slave(thd,1,1); @@ -1913,7 +1935,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd, ulong options= (ulong) (uchar) packet[0]; if (check_global_access(thd,RELOAD_ACL)) break; - mysql_log.write(thd,command,NullS); + general_log_print(thd, command, NullS); if (!reload_acl_and_cache(thd, options, (TABLE_LIST*) 0, ¬_used)) send_ok(thd); break; @@ -1941,7 +1963,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd, break; } DBUG_PRINT("quit",("Got shutdown command for level %u", level)); - mysql_log.write(thd,command,NullS); + general_log_print(thd, command, NullS); send_eof(thd); #ifdef __WIN__ sleep(1); // must wait after eof() @@ -1958,7 +1980,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd, #endif case COM_STATISTICS: { - mysql_log.write(thd,command,NullS); + general_log_print(thd, command, NullS); statistic_increment(thd->status_var.com_stat[SQLCOM_SHOW_STATUS], &LOCK_status); #ifndef EMBEDDED_LIBRARY @@ -1972,7 +1994,8 @@ bool dispatch_command(enum enum_server_command command, THD *thd, uptime, (int) thread_count, (ulong) thd->query_id, (ulong) thd->status_var.long_query_count, - thd->status_var.opened_tables, refresh_version, cached_tables(), + thd->status_var.opened_tables, refresh_version, + cached_open_tables(), (uptime ? (ulonglong2double(thd->query_id) / (double) uptime) : (double) 0)); #ifdef SAFEMALLOC @@ -1997,7 +2020,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd, if (!thd->security_ctx->priv_user[0] && check_global_access(thd, PROCESS_ACL)) break; - mysql_log.write(thd,command,NullS); + general_log_print(thd, command, NullS); mysqld_list_processes(thd, thd->security_ctx->master_access & PROCESS_ACL ? NullS : thd->security_ctx->priv_user, 0); @@ -2034,7 +2057,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd, if (check_global_access(thd, SUPER_ACL)) break; /* purecov: inspected */ mysql_print_status(); - mysql_log.write(thd,command,NullS); + general_log_print(thd, command, NullS); send_eof(thd); break; case COM_SLEEP: @@ -2088,6 +2111,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd, void log_slow_statement(THD *thd) { + DBUG_ENTER("log_slow_statement"); time_t start_of_query; /* @@ -2116,9 +2140,10 @@ void log_slow_statement(THD *thd) (specialflag & SPECIAL_LOG_QUERIES_NOT_USING_INDEXES))) { thd->status_var.long_query_count++; - mysql_slow_log.write(thd, thd->query, thd->query_length, start_of_query); + slow_log_print(thd, thd->query, thd->query_length, start_of_query); } } + DBUG_VOID_RETURN; } @@ -2143,6 +2168,7 @@ int prepare_schema_table(THD *thd, LEX *lex, Table_ident *table_ident, case SCH_TABLES: case SCH_VIEWS: case SCH_TRIGGERS: + case SCH_EVENTS: #ifdef DONT_ALLOW_SHOW_COMMANDS my_message(ER_NOT_ALLOWED_COMMAND, ER(ER_NOT_ALLOWED_COMMAND), MYF(0)); /* purecov: inspected */ @@ -2217,6 +2243,7 @@ int prepare_schema_table(THD *thd, LEX *lex, Table_ident *table_ident, case SCH_STATUS: case SCH_PROCEDURES: case SCH_CHARSETS: + case SCH_ENGINES: case SCH_COLLATIONS: case SCH_COLLATION_CHARACTER_SET_APPLICABILITY: case SCH_USER_PRIVILEGES: @@ -2433,11 +2460,15 @@ mysql_execute_command(THD *thd) if (all_tables) { if (lex->orig_sql_command != SQLCOM_SHOW_STATUS_PROC && - lex->orig_sql_command != SQLCOM_SHOW_STATUS_FUNC) + lex->orig_sql_command != SQLCOM_SHOW_STATUS_FUNC && + lex->orig_sql_command != SQLCOM_SHOW_EVENTS) res= check_table_access(thd, lex->exchange ? SELECT_ACL | FILE_ACL : SELECT_ACL, all_tables, 0); + else if (lex->orig_sql_command == SQLCOM_SHOW_EVENTS) + res= check_access(thd, EVENT_ACL, thd->lex->select_lex.db, 0, 0, 0, + is_schema_db(thd->lex->select_lex.db)); } else res= check_access(thd, @@ -2682,29 +2713,20 @@ mysql_execute_command(THD *thd) res = load_master_data(thd); break; #endif /* HAVE_REPLICATION */ -#ifdef HAVE_NDBCLUSTER_DB - case SQLCOM_SHOW_NDBCLUSTER_STATUS: - { - res = ndbcluster_show_status(thd); - break; - } -#endif -#ifdef HAVE_INNOBASE_DB - case SQLCOM_SHOW_INNODB_STATUS: + case SQLCOM_SHOW_ENGINE_STATUS: { if (check_global_access(thd, SUPER_ACL)) - goto error; - res = innodb_show_status(thd); + goto error; + res = ha_show_status(thd, lex->create_info.db_type, HA_ENGINE_STATUS); break; } - case SQLCOM_SHOW_MUTEX_STATUS: + case SQLCOM_SHOW_ENGINE_MUTEX: { if (check_global_access(thd, SUPER_ACL)) goto error; - res = innodb_mutex_show_status(thd); + res = ha_show_status(thd, lex->create_info.db_type, HA_ENGINE_MUTEX); break; } -#endif #ifdef HAVE_REPLICATION case SQLCOM_LOAD_MASTER_TABLE: { @@ -3053,7 +3075,7 @@ end_with_restore_list: } } query_cache_invalidate3(thd, first_table, 0); - if (end_active_trans(thd) || mysql_rename_tables(thd, first_table)) + if (end_active_trans(thd) || mysql_rename_tables(thd, first_table, 0)) goto error; break; } @@ -3453,13 +3475,16 @@ end_with_restore_list: case SQLCOM_SHOW_STORAGE_ENGINES: res= mysqld_show_storage_engines(thd); break; + case SQLCOM_SHOW_AUTHORS: + res= mysqld_show_authors(thd); + break; case SQLCOM_SHOW_PRIVILEGES: res= mysqld_show_privileges(thd); break; case SQLCOM_SHOW_COLUMN_TYPES: res= mysqld_show_column_types(thd); break; - case SQLCOM_SHOW_LOGS: + case SQLCOM_SHOW_ENGINE_LOGS: #ifdef DONT_ALLOW_SHOW_COMMANDS my_message(ER_NOT_ALLOWED_COMMAND, ER(ER_NOT_ALLOWED_COMMAND), MYF(0)); /* purecov: inspected */ @@ -3468,7 +3493,7 @@ end_with_restore_list: { if (grant_option && check_access(thd, FILE_ACL, any_db,0,0,0,0)) goto error; - res= mysqld_show_logs(thd); + res= ha_show_status(thd, lex->create_info.db_type, HA_ENGINE_LOGS); break; } #endif @@ -3587,9 +3612,9 @@ end_with_restore_list: above was not called. So we have to check rules again here. */ #ifdef HAVE_REPLICATION - if (thd->slave_thread && - (!db_ok(lex->name, replicate_do_db, replicate_ignore_db) || - !db_ok_with_wild_table(lex->name))) + if (thd->slave_thread && + (!rpl_filter->db_ok(lex->name) || + !rpl_filter->db_ok_with_wild_table(lex->name))) { my_message(ER_SLAVE_IGNORED_TABLE, ER(ER_SLAVE_IGNORED_TABLE), MYF(0)); break; @@ -3622,8 +3647,8 @@ end_with_restore_list: */ #ifdef HAVE_REPLICATION if (thd->slave_thread && - (!db_ok(lex->name, replicate_do_db, replicate_ignore_db) || - !db_ok_with_wild_table(lex->name))) + (!rpl_filter->db_ok(lex->name) || + !rpl_filter->db_ok_with_wild_table(lex->name))) { my_message(ER_SLAVE_IGNORED_TABLE, ER(ER_SLAVE_IGNORED_TABLE), MYF(0)); break; @@ -3640,6 +3665,48 @@ end_with_restore_list: res= mysql_rm_db(thd, lex->name, lex->drop_if_exists, 0); break; } + case SQLCOM_RENAME_DB: + { + LEX_STRING *olddb, *newdb; + List_iterator <LEX_STRING> db_list(lex->db_list); + olddb= db_list++; + newdb= db_list++; + if (end_active_trans(thd)) + { + res= 1; + break; + } +#ifdef HAVE_REPLICATION + if (thd->slave_thread && + (!rpl_filter->db_ok(olddb->str) || + !rpl_filter->db_ok(newdb->str) || + !rpl_filter->db_ok_with_wild_table(olddb->str) || + !rpl_filter->db_ok_with_wild_table(newdb->str))) + { + res= 1; + my_message(ER_SLAVE_IGNORED_TABLE, ER(ER_SLAVE_IGNORED_TABLE), MYF(0)); + break; + } +#endif + if (check_access(thd,ALTER_ACL,olddb->str,0,1,0,is_schema_db(olddb->str)) || + check_access(thd,DROP_ACL,olddb->str,0,1,0,is_schema_db(olddb->str)) || + check_access(thd,CREATE_ACL,newdb->str,0,1,0,is_schema_db(newdb->str))) + { + res= 1; + break; + } + if (thd->locked_tables || thd->active_transaction()) + { + res= 1; + my_message(ER_LOCK_OR_ACTIVE_TRANSACTION, + ER(ER_LOCK_OR_ACTIVE_TRANSACTION), MYF(0)); + goto error; + } + res= mysql_rename_db(thd, olddb, newdb); + if (!res) + send_ok(thd); + break; + } case SQLCOM_ALTER_DB: { char *db= lex->name ? lex->name : thd->db; @@ -3650,7 +3717,7 @@ end_with_restore_list: } if (!strip_sp(db) || check_db_name(db)) { - my_error(ER_WRONG_DB_NAME, MYF(0), lex->name); + my_error(ER_WRONG_DB_NAME, MYF(0), db); break; } /* @@ -3662,8 +3729,8 @@ end_with_restore_list: */ #ifdef HAVE_REPLICATION if (thd->slave_thread && - (!db_ok(db, replicate_do_db, replicate_ignore_db) || - !db_ok_with_wild_table(db))) + (!rpl_filter->db_ok(db) || + !rpl_filter->db_ok_with_wild_table(db))) { my_message(ER_SLAVE_IGNORED_TABLE, ER(ER_SLAVE_IGNORED_TABLE), MYF(0)); break; @@ -3687,11 +3754,79 @@ end_with_restore_list: my_error(ER_WRONG_DB_NAME, MYF(0), lex->name); break; } - if (check_access(thd,SELECT_ACL,lex->name,0,1,0,is_schema_db(lex->name))) - break; res=mysqld_show_create_db(thd,lex->name,&lex->create_info); break; } + case SQLCOM_CREATE_EVENT: + case SQLCOM_ALTER_EVENT: + case SQLCOM_DROP_EVENT: + { + uint rows_affected= 1; + DBUG_ASSERT(lex->et); + do { + if (! lex->et->dbname.str) + { + my_message(ER_NO_DB_ERROR, ER(ER_NO_DB_ERROR), MYF(0)); + res= true; + break; + } + + if (check_access(thd, EVENT_ACL, lex->et->dbname.str, 0, 0, 0, + is_schema_db(lex->et->dbname.str))) + break; + + if (end_active_trans(thd)) + { + res= -1; + break; + } + + switch (lex->sql_command) { + case SQLCOM_CREATE_EVENT: + res= evex_create_event(thd, lex->et, (uint) lex->create_info.options, + &rows_affected); + break; + case SQLCOM_ALTER_EVENT: + res= evex_update_event(thd, lex->et, lex->spname, &rows_affected); + break; + case SQLCOM_DROP_EVENT: + res= evex_drop_event(thd, lex->et, lex->drop_if_exists, &rows_affected); + default:; + } + DBUG_PRINT("info", ("CREATE/ALTER/DROP returned error code=%d af_rows=%d", + res, rows_affected)); + if (!res) + send_ok(thd, rows_affected); + + /* lex->unit.cleanup() is called outside, no need to call it here */ + } while (0); + lex->et->free_sphead_on_delete= true; + delete lex->et; + lex->et= 0; + break; + } + case SQLCOM_SHOW_CREATE_EVENT: + { + DBUG_ASSERT(lex->spname); + DBUG_ASSERT(lex->et); + if (! lex->spname->m_db.str) + { + my_message(ER_NO_DB_ERROR, ER(ER_NO_DB_ERROR), MYF(0)); + res= true; + break; + } + if (check_access(thd, EVENT_ACL, lex->spname->m_db.str, 0, 0, 0, + is_schema_db(lex->spname->m_db.str))) + break; + + if (lex->spname->m_name.length > NAME_LEN) + { + my_error(ER_TOO_LONG_IDENT, MYF(0), lex->spname->m_name.str); + goto error; + } + res= evex_show_create_event(thd, lex->spname, lex->et->definer); + break; + } case SQLCOM_CREATE_FUNCTION: // UDF function { if (check_access(thd,INSERT_ACL,"mysql",0,1,0,0)) @@ -3723,8 +3858,8 @@ end_with_restore_list: { if (mysql_bin_log.is_open()) { - Query_log_event qinfo(thd, thd->query, thd->query_length, 0, FALSE); - mysql_bin_log.write(&qinfo); + thd->binlog_query(THD::MYSQL_QUERY_TYPE, + thd->query, thd->query_length, FALSE, FALSE); } send_ok(thd); } @@ -3741,8 +3876,8 @@ end_with_restore_list: { if (mysql_bin_log.is_open()) { - Query_log_event qinfo(thd, thd->query, thd->query_length, 0, FALSE); - mysql_bin_log.write(&qinfo); + thd->binlog_query(THD::MYSQL_QUERY_TYPE, + thd->query, thd->query_length, FALSE, FALSE); } send_ok(thd); } @@ -3759,8 +3894,8 @@ end_with_restore_list: { if (mysql_bin_log.is_open()) { - Query_log_event qinfo(thd, thd->query, thd->query_length, 0, FALSE); - mysql_bin_log.write(&qinfo); + thd->binlog_query(THD::MYSQL_QUERY_TYPE, + thd->query, thd->query_length, FALSE, FALSE); } send_ok(thd); } @@ -3775,8 +3910,8 @@ end_with_restore_list: { if (mysql_bin_log.is_open()) { - Query_log_event qinfo(thd, thd->query, thd->query_length, 0, FALSE); - mysql_bin_log.write(&qinfo); + thd->binlog_query(THD::MYSQL_QUERY_TYPE, + thd->query, thd->query_length, FALSE, FALSE); } send_ok(thd); } @@ -3855,8 +3990,8 @@ end_with_restore_list: if (!res && mysql_bin_log.is_open()) { thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, 0, FALSE); - mysql_bin_log.write(&qinfo); + thd->binlog_query(THD::MYSQL_QUERY_TYPE, + thd->query, thd->query_length, FALSE, FALSE); } } else @@ -3875,8 +4010,8 @@ end_with_restore_list: if (mysql_bin_log.is_open()) { thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, 0, FALSE); - mysql_bin_log.write(&qinfo); + thd->binlog_query(THD::MYSQL_QUERY_TYPE, + thd->query, thd->query_length, FALSE, FALSE); } if (lex->sql_command == SQLCOM_GRANT) { @@ -4172,12 +4307,12 @@ end_with_restore_list: db, name, lex->sql_command == SQLCOM_CREATE_PROCEDURE, 1)) { - close_thread_tables(thd); if (sp_grant_privileges(thd, db, name, lex->sql_command == SQLCOM_CREATE_PROCEDURE)) push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_PROC_AUTO_GRANT_FAIL, ER(ER_PROC_AUTO_GRANT_FAIL)); + close_thread_tables(thd); } #endif send_ok(thd); @@ -4395,8 +4530,8 @@ end_with_restore_list: if (mysql_bin_log.is_open()) { thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, 0, FALSE); - mysql_bin_log.write(&qinfo); + thd->binlog_query(THD::MYSQL_QUERY_TYPE, + thd->query, thd->query_length, FALSE, FALSE); } send_ok(thd); break; @@ -4480,8 +4615,8 @@ end_with_restore_list: if (mysql_bin_log.is_open()) { thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, 0, FALSE); - mysql_bin_log.write(&qinfo); + thd->binlog_query(THD::MYSQL_QUERY_TYPE, + thd->query, thd->query_length, FALSE, FALSE); } send_ok(thd); break; @@ -4605,8 +4740,8 @@ end_with_restore_list: buff.append(STRING_WITH_LEN(" AS ")); buff.append(first_table->source.str, first_table->source.length); - Query_log_event qinfo(thd, buff.ptr(), buff.length(), 0, FALSE); - mysql_bin_log.write(&qinfo); + thd->binlog_query(THD::STMT_QUERY_TYPE, + buff.ptr(), buff.length(), FALSE, FALSE); } break; } @@ -4619,8 +4754,8 @@ end_with_restore_list: mysql_bin_log.is_open()) { thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, 0, FALSE); - mysql_bin_log.write(&qinfo); + thd->binlog_query(THD::STMT_QUERY_TYPE, + thd->query, thd->query_length, FALSE, FALSE); } break; } @@ -4814,6 +4949,30 @@ end_with_restore_list: case SQLCOM_XA_RECOVER: res= mysql_xa_recover(thd); break; + case SQLCOM_ALTER_TABLESPACE: + if (check_access(thd, ALTER_ACL, thd->db, 0, 1, 0, thd->db ? is_schema_db(thd->db) : 0)) + break; + if (!(res= mysql_alter_tablespace(thd, lex->alter_tablespace_info))) + send_ok(thd); + break; + case SQLCOM_INSTALL_PLUGIN: + if (! (res= mysql_install_plugin(thd, &thd->lex->comment, + &thd->lex->ident))) + send_ok(thd); + break; + case SQLCOM_UNINSTALL_PLUGIN: + if (! (res= mysql_uninstall_plugin(thd, &thd->lex->comment))) + send_ok(thd); + break; + case SQLCOM_BINLOG_BASE64_EVENT: + { +#ifndef EMBEDDED_LIBRARY + mysql_client_binlog_statement(thd); +#else /* EMBEDDED_LIBRARY */ + my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "embedded"); +#endif /* EMBEDDED_LIBRARY */ + break; + } default: #ifndef EMBEDDED_LIBRARY DBUG_ASSERT(0); /* Impossible */ @@ -4822,9 +4981,9 @@ end_with_restore_list: break; } thd->proc_info="query end"; - /* Two binlog-related cleanups: */ /* + Binlog-related cleanup: Reset system variables temporarily modified by SET ONE SHOT. Exception: If this is a SET, do nothing. This is to allow @@ -4835,6 +4994,7 @@ end_with_restore_list: */ if (thd->one_shot_set && lex->sql_command != SQLCOM_SET_OPTION) reset_one_shot_variables(thd); + thd->reset_current_stmt_binlog_row_based(); /* The return value for ROW_COUNT() is "implementation dependent" if the @@ -5557,7 +5717,6 @@ void mysql_init_multi_delete(LEX *lex) lex->query_tables_last= &lex->query_tables; } - /* When you modify mysql_parse(), you may need to mofify mysql_test_parse_for_slave() in this same file. @@ -5592,6 +5751,12 @@ void mysql_parse(THD *thd, char *inBuf, uint length) delete thd->lex->sphead; thd->lex->sphead= NULL; } + if (thd->lex->et) + { + thd->lex->et->free_sphead_on_delete= true; + delete thd->lex->et; + thd->lex->et= NULL; + } } else { @@ -5627,6 +5792,12 @@ void mysql_parse(THD *thd, char *inBuf, uint length) delete thd->lex->sphead; thd->lex->sphead= NULL; } + if (thd->lex->et) + { + thd->lex->et->free_sphead_on_delete= true; + delete thd->lex->et; + thd->lex->et= NULL; + } } thd->proc_info="freeing items"; thd->end_statement(); @@ -5750,10 +5921,7 @@ bool add_field_to_list(THD *thd, char *field_name, enum_field_types type, */ char buf[32]; my_snprintf(buf, sizeof(buf), "TIMESTAMP(%s)", length); - push_warning_printf(thd,MYSQL_ERROR::WARN_LEVEL_WARN, - ER_WARN_DEPRECATED_SYNTAX, - ER(ER_WARN_DEPRECATED_SYNTAX), - buf, "TIMESTAMP"); + WARN_DEPRECATED(thd, "5.2", buf, "'TIMESTAMP'"); } if (!(new_field= new create_field()) || @@ -5882,12 +6050,16 @@ TABLE_LIST *st_select_lex::add_table_to_list(THD *thd, if (!table) DBUG_RETURN(0); // End of memory alias_str= alias ? alias->str : table->table.str; - if (check_table_name(table->table.str,table->table.length) || - table->db.str && check_db_name(table->db.str)) + if (check_table_name(table->table.str,table->table.length)) { my_error(ER_WRONG_TABLE_NAME, MYF(0), table->table.str); DBUG_RETURN(0); } + if (table->db.str && check_db_name(table->db.str)) + { + my_error(ER_WRONG_DB_NAME, MYF(0), table->db.str); + DBUG_RETURN(0); + } if (!alias) /* Alias is case sensitive */ { @@ -6442,7 +6614,8 @@ bool reload_acl_and_cache(THD *thd, ulong options, TABLE_LIST *tables, { /* Flush the normal query log, the update log, the binary log, - the slow query log, and the relay log (if it exists). + the slow query log, the relay log (if it exists) and the log + tables. */ /* @@ -6452,15 +6625,17 @@ bool reload_acl_and_cache(THD *thd, ulong options, TABLE_LIST *tables, than it would help them) */ tmp_write_to_binlog= 0; - mysql_log.new_file(1); - mysql_slow_log.new_file(1); mysql_bin_log.rotate_and_purge(RP_FORCE_ROTATE); #ifdef HAVE_REPLICATION pthread_mutex_lock(&LOCK_active_mi); rotate_relay_log(active_mi); pthread_mutex_unlock(&LOCK_active_mi); #endif - if (ha_flush_logs()) + + /* flush slow and general logs */ + logger.flush_logs(thd); + + if (ha_flush_logs(NULL)) result=1; if (flush_error_log()) result=1; @@ -6584,6 +6759,8 @@ void kill_one_thread(THD *thd, ulong id, bool only_kill_query) I_List_iterator<THD> it(threads); while ((tmp=it++)) { + if (tmp->command == COM_DAEMON) + continue; if (tmp->thread_id == id) { pthread_mutex_lock(&tmp->LOCK_delete); // Lock from delete @@ -6621,11 +6798,10 @@ static void refresh_status(THD *thd) add_to_status(&global_status_var, &thd->status_var); bzero((char*) &thd->status_var, sizeof(thd->status_var)); - for (struct show_var_st *ptr=status_vars; ptr->name; ptr++) - { - if (ptr->type == SHOW_LONG) + for (SHOW_VAR *ptr= status_vars; ptr->name; ptr++) + if (ptr->type == SHOW_LONG) // note that SHOW_LONG_NOFLUSH variables are not reset *(ulong*) ptr->value= 0; - } + /* Reset the counters of all key caches (default and named). */ process_key_caches(reset_key_cache_counters); pthread_mutex_unlock(&LOCK_status); @@ -6634,8 +6810,8 @@ static void refresh_status(THD *thd) /* If pointer is not a null pointer, append filename to it */ -static bool append_file_to_dir(THD *thd, const char **filename_ptr, - const char *table_name) +bool append_file_to_dir(THD *thd, const char **filename_ptr, + const char *table_name) { char buff[FN_REFLEN],*ptr, *end; if (!*filename_ptr) @@ -6775,7 +6951,7 @@ bool mysql_create_index(THD *thd, TABLE_LIST *table_list, List<Key> &keys) HA_CREATE_INFO create_info; DBUG_ENTER("mysql_create_index"); bzero((char*) &create_info,sizeof(create_info)); - create_info.db_type=DB_TYPE_DEFAULT; + create_info.db_type= (handlerton*) &default_hton; create_info.default_table_charset= thd->variables.collation_database; DBUG_RETURN(mysql_alter_table(thd,table_list->db,table_list->table_name, &create_info, table_list, @@ -6791,7 +6967,7 @@ bool mysql_drop_index(THD *thd, TABLE_LIST *table_list, ALTER_INFO *alter_info) HA_CREATE_INFO create_info; DBUG_ENTER("mysql_drop_index"); bzero((char*) &create_info,sizeof(create_info)); - create_info.db_type=DB_TYPE_DEFAULT; + create_info.db_type= (handlerton*) &default_hton; create_info.default_table_charset= thd->variables.collation_database; alter_info->clear(); alter_info->flags= ALTER_DROP_INDEX; diff --git a/sql/sql_partition.cc b/sql/sql_partition.cc new file mode 100644 index 00000000000..c98f8f915b9 --- /dev/null +++ b/sql/sql_partition.cc @@ -0,0 +1,5832 @@ +/* Copyright (C) 2005 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + This file is a container for general functionality related + to partitioning introduced in MySQL version 5.1. It contains functionality + used by all handlers that support partitioning, such as + the partitioning handler itself and the NDB handler. + + The first version was written by Mikael Ronstrom. + + This version supports RANGE partitioning, LIST partitioning, HASH + partitioning and composite partitioning (hereafter called subpartitioning) + where each RANGE/LIST partitioning is HASH partitioned. The hash function + can either be supplied by the user or by only a list of fields (also + called KEY partitioning), where the MySQL server will use an internal + hash function. + There are quite a few defaults that can be used as well. +*/ + +/* Some general useful functions */ + +#include "mysql_priv.h" +#include <errno.h> +#include <m_ctype.h> +#include "md5.h" + +#ifdef WITH_PARTITION_STORAGE_ENGINE +#include "ha_partition.h" +/* + Partition related functions declarations and some static constants; +*/ +const LEX_STRING partition_keywords[]= +{ + { (char *) STRING_WITH_LEN("HASH") }, + { (char *) STRING_WITH_LEN("RANGE") }, + { (char *) STRING_WITH_LEN("LIST") }, + { (char *) STRING_WITH_LEN("KEY") }, + { (char *) STRING_WITH_LEN("MAXVALUE") }, + { (char *) STRING_WITH_LEN("LINEAR ") } +}; +static const char *part_str= "PARTITION"; +static const char *sub_str= "SUB"; +static const char *by_str= "BY"; +static const char *space_str= " "; +static const char *equal_str= "="; +static const char *end_paren_str= ")"; +static const char *begin_paren_str= "("; +static const char *comma_str= ","; +static char buff[22]; + +int get_partition_id_list(partition_info *part_info, + uint32 *part_id, + longlong *func_value); +int get_partition_id_range(partition_info *part_info, + uint32 *part_id, + longlong *func_value); +int get_partition_id_hash_nosub(partition_info *part_info, + uint32 *part_id, + longlong *func_value); +int get_partition_id_key_nosub(partition_info *part_info, + uint32 *part_id, + longlong *func_value); +int get_partition_id_linear_hash_nosub(partition_info *part_info, + uint32 *part_id, + longlong *func_value); +int get_partition_id_linear_key_nosub(partition_info *part_info, + uint32 *part_id, + longlong *func_value); +int get_partition_id_range_sub_hash(partition_info *part_info, + uint32 *part_id, + longlong *func_value); +int get_partition_id_range_sub_key(partition_info *part_info, + uint32 *part_id, + longlong *func_value); +int get_partition_id_range_sub_linear_hash(partition_info *part_info, + uint32 *part_id, + longlong *func_value); +int get_partition_id_range_sub_linear_key(partition_info *part_info, + uint32 *part_id, + longlong *func_value); +int get_partition_id_list_sub_hash(partition_info *part_info, + uint32 *part_id, + longlong *func_value); +int get_partition_id_list_sub_key(partition_info *part_info, + uint32 *part_id, + longlong *func_value); +int get_partition_id_list_sub_linear_hash(partition_info *part_info, + uint32 *part_id, + longlong *func_value); +int get_partition_id_list_sub_linear_key(partition_info *part_info, + uint32 *part_id, + longlong *func_value); +uint32 get_partition_id_hash_sub(partition_info *part_info); +uint32 get_partition_id_key_sub(partition_info *part_info); +uint32 get_partition_id_linear_hash_sub(partition_info *part_info); +uint32 get_partition_id_linear_key_sub(partition_info *part_info); +#endif + +static uint32 get_next_partition_via_walking(PARTITION_ITERATOR*); +static uint32 get_next_subpartition_via_walking(PARTITION_ITERATOR*); +uint32 get_next_partition_id_range(PARTITION_ITERATOR* part_iter); +uint32 get_next_partition_id_list(PARTITION_ITERATOR* part_iter); +int get_part_iter_for_interval_via_mapping(partition_info *part_info, + bool is_subpart, + char *min_value, char *max_value, + uint flags, + PARTITION_ITERATOR *part_iter); +int get_part_iter_for_interval_via_walking(partition_info *part_info, + bool is_subpart, + char *min_value, char *max_value, + uint flags, + PARTITION_ITERATOR *part_iter); +static void set_up_range_analysis_info(partition_info *part_info); + +/* + A routine used by the parser to decide whether we are specifying a full + partitioning or if only partitions to add or to split. + + SYNOPSIS + is_partition_management() + lex Reference to the lex object + + RETURN VALUE + TRUE Yes, it is part of a management partition command + FALSE No, not a management partition command + + DESCRIPTION + This needs to be outside of WITH_PARTITION_STORAGE_ENGINE since it is + used from the sql parser that doesn't have any #ifdef's +*/ + +my_bool is_partition_management(LEX *lex) +{ + return (lex->sql_command == SQLCOM_ALTER_TABLE && + (lex->alter_info.flags == ALTER_ADD_PARTITION || + lex->alter_info.flags == ALTER_REORGANIZE_PARTITION)); +} + +#ifdef WITH_PARTITION_STORAGE_ENGINE +/* + A support function to check if a name is in a list of strings + + SYNOPSIS + is_name_in_list() + name String searched for + list_names A list of names searched in + + RETURN VALUES + TRUE String found + FALSE String not found +*/ + +bool is_name_in_list(char *name, + List<char> list_names) +{ + List_iterator<char> names_it(list_names); + uint no_names= list_names.elements; + uint i= 0; + + do + { + char *list_name= names_it++; + if (!(my_strcasecmp(system_charset_info, name, list_name))) + return TRUE; + } while (++i < no_names); + return FALSE; +} + + + +/* + Set-up defaults for partitions. + + SYNOPSIS + partition_default_handling() + table Table object + table_name Table name to use when getting no_parts + db_name Database name to use when getting no_parts + part_info Partition info to set up + + RETURN VALUES + TRUE Error + FALSE Success +*/ + +bool partition_default_handling(TABLE *table, partition_info *part_info, + const char *normalized_path) +{ + DBUG_ENTER("partition_default_handling"); + + if (part_info->use_default_no_partitions) + { + if (table->file->get_no_parts(normalized_path, &part_info->no_parts)) + { + DBUG_RETURN(TRUE); + } + } + else if (part_info->is_sub_partitioned() && + part_info->use_default_no_subpartitions) + { + uint no_parts; + if (table->file->get_no_parts(normalized_path, &no_parts)) + { + DBUG_RETURN(TRUE); + } + DBUG_ASSERT(part_info->no_parts > 0); + part_info->no_subparts= no_parts / part_info->no_parts; + DBUG_ASSERT((no_parts % part_info->no_parts) == 0); + } + part_info->set_up_defaults_for_partitioning(table->file, + (ulonglong)0, (uint)0); + DBUG_RETURN(FALSE); +} + + +/* + Check that the reorganized table will not have duplicate partitions. + + SYNOPSIS + check_reorganise_list() + new_part_info New partition info + old_part_info Old partition info + list_part_names The list of partition names that will go away and can be reused in the + new table. + + RETURN VALUES + TRUE Inacceptable name conflict detected. + FALSE New names are OK. + + DESCRIPTION + Can handle that the 'new_part_info' and 'old_part_info' the same + in which case it checks that the list of names in the partitions + doesn't contain any duplicated names. +*/ + +bool check_reorganise_list(partition_info *new_part_info, + partition_info *old_part_info, + List<char> list_part_names) +{ + uint new_count, old_count; + uint no_new_parts= new_part_info->partitions.elements; + uint no_old_parts= old_part_info->partitions.elements; + List_iterator<partition_element> new_parts_it(new_part_info->partitions); + bool same_part_info= (new_part_info == old_part_info); + DBUG_ENTER("check_reorganise_list"); + + new_count= 0; + do + { + List_iterator<partition_element> old_parts_it(old_part_info->partitions); + char *new_name= (new_parts_it++)->partition_name; + new_count++; + old_count= 0; + do + { + char *old_name= (old_parts_it++)->partition_name; + old_count++; + if (same_part_info && old_count == new_count) + break; + if (!(my_strcasecmp(system_charset_info, old_name, new_name))) + { + if (!is_name_in_list(old_name, list_part_names)) + DBUG_RETURN(TRUE); + } + } while (old_count < no_old_parts); + } while (new_count < no_new_parts); + DBUG_RETURN(FALSE); +} + + +/* + A useful routine used by update_row for partition handlers to calculate + the partition ids of the old and the new record. + + SYNOPSIS + get_part_for_update() + old_data Buffer of old record + new_data Buffer of new record + rec0 Reference to table->record[0] + part_info Reference to partition information + out:old_part_id The returned partition id of old record + out:new_part_id The returned partition id of new record + + RETURN VALUE + 0 Success + > 0 Error code +*/ + +int get_parts_for_update(const byte *old_data, byte *new_data, + const byte *rec0, partition_info *part_info, + uint32 *old_part_id, uint32 *new_part_id, + longlong *new_func_value) +{ + Field **part_field_array= part_info->full_part_field_array; + int error; + longlong old_func_value; + DBUG_ENTER("get_parts_for_update"); + + DBUG_ASSERT(new_data == rec0); + set_field_ptr(part_field_array, old_data, rec0); + error= part_info->get_partition_id(part_info, old_part_id, + &old_func_value); + set_field_ptr(part_field_array, rec0, old_data); + if (unlikely(error)) // Should never happen + { + DBUG_ASSERT(0); + DBUG_RETURN(error); + } +#ifdef NOT_NEEDED + if (new_data == rec0) +#endif + { + if (unlikely(error= part_info->get_partition_id(part_info, + new_part_id, + new_func_value))) + { + DBUG_RETURN(error); + } + } +#ifdef NOT_NEEDED + else + { + /* + This branch should never execute but it is written anyways for + future use. It will be tested by ensuring that the above + condition is false in one test situation before pushing the code. + */ + set_field_ptr(part_field_array, new_data, rec0); + error= part_info->get_partition_id(part_info, new_part_id, + new_func_value); + set_field_ptr(part_field_array, rec0, new_data); + if (unlikely(error)) + { + DBUG_RETURN(error); + } + } +#endif + DBUG_RETURN(0); +} + + +/* + A useful routine used by delete_row for partition handlers to calculate + the partition id. + + SYNOPSIS + get_part_for_delete() + buf Buffer of old record + rec0 Reference to table->record[0] + part_info Reference to partition information + out:part_id The returned partition id to delete from + + RETURN VALUE + 0 Success + > 0 Error code + + DESCRIPTION + Dependent on whether buf is not record[0] we need to prepare the + fields. Then we call the function pointer get_partition_id to + calculate the partition id. +*/ + +int get_part_for_delete(const byte *buf, const byte *rec0, + partition_info *part_info, uint32 *part_id) +{ + int error; + longlong func_value; + DBUG_ENTER("get_part_for_delete"); + + if (likely(buf == rec0)) + { + if (unlikely((error= part_info->get_partition_id(part_info, part_id, + &func_value)))) + { + DBUG_RETURN(error); + } + DBUG_PRINT("info", ("Delete from partition %d", *part_id)); + } + else + { + Field **part_field_array= part_info->full_part_field_array; + set_field_ptr(part_field_array, buf, rec0); + error= part_info->get_partition_id(part_info, part_id, &func_value); + set_field_ptr(part_field_array, rec0, buf); + if (unlikely(error)) + { + DBUG_RETURN(error); + } + DBUG_PRINT("info", ("Delete from partition %d (path2)", *part_id)); + } + DBUG_RETURN(0); +} + + +/* + This routine allocates an array for all range constants to achieve a fast + check what partition a certain value belongs to. At the same time it does + also check that the range constants are defined in increasing order and + that the expressions are constant integer expressions. + + SYNOPSIS + check_range_constants() + part_info Partition info + + RETURN VALUE + TRUE An error occurred during creation of range constants + FALSE Successful creation of range constant mapping + + DESCRIPTION + This routine is called from check_partition_info to get a quick error + before we came too far into the CREATE TABLE process. It is also called + from fix_partition_func every time we open the .frm file. It is only + called for RANGE PARTITIONed tables. +*/ + +static bool check_range_constants(partition_info *part_info) +{ + partition_element* part_def; + longlong current_largest_int= LONGLONG_MIN; + longlong part_range_value_int; + uint no_parts= part_info->no_parts; + uint i; + List_iterator<partition_element> it(part_info->partitions); + bool result= TRUE; + DBUG_ENTER("check_range_constants"); + DBUG_PRINT("enter", ("INT_RESULT with %d parts", no_parts)); + + part_info->part_result_type= INT_RESULT; + part_info->range_int_array= + (longlong*)sql_alloc(no_parts * sizeof(longlong)); + if (unlikely(part_info->range_int_array == NULL)) + { + mem_alloc_error(no_parts * sizeof(longlong)); + goto end; + } + i= 0; + do + { + part_def= it++; + if ((i != (no_parts - 1)) || !part_info->defined_max_value) + part_range_value_int= part_def->range_value; + else + part_range_value_int= LONGLONG_MAX; + if (likely(current_largest_int < part_range_value_int)) + { + current_largest_int= part_range_value_int; + part_info->range_int_array[i]= part_range_value_int; + } + else + { + my_error(ER_RANGE_NOT_INCREASING_ERROR, MYF(0)); + goto end; + } + } while (++i < no_parts); + result= FALSE; +end: + DBUG_RETURN(result); +} + + +/* + A support routine for check_list_constants used by qsort to sort the + constant list expressions. + + SYNOPSIS + list_part_cmp() + a First list constant to compare with + b Second list constant to compare with + + RETURN VALUE + +1 a > b + 0 a == b + -1 a < b +*/ + +static int list_part_cmp(const void* a, const void* b) +{ + longlong a1= ((LIST_PART_ENTRY*)a)->list_value; + longlong b1= ((LIST_PART_ENTRY*)b)->list_value; + if (a1 < b1) + return -1; + else if (a1 > b1) + return +1; + else + return 0; +} + + +/* + This routine allocates an array for all list constants to achieve a fast + check what partition a certain value belongs to. At the same time it does + also check that there are no duplicates among the list constants and that + that the list expressions are constant integer expressions. + + SYNOPSIS + check_list_constants() + part_info Partition info + + RETURN VALUE + TRUE An error occurred during creation of list constants + FALSE Successful creation of list constant mapping + + DESCRIPTION + This routine is called from check_partition_info to get a quick error + before we came too far into the CREATE TABLE process. It is also called + from fix_partition_func every time we open the .frm file. It is only + called for LIST PARTITIONed tables. +*/ + +static bool check_list_constants(partition_info *part_info) +{ + uint i, no_parts; + uint no_list_values= 0; + uint list_index= 0; + longlong *list_value; + bool not_first; + bool result= TRUE; + longlong curr_value, prev_value; + partition_element* part_def; + List_iterator<partition_element> list_func_it(part_info->partitions); + DBUG_ENTER("check_list_constants"); + + part_info->part_result_type= INT_RESULT; + + /* + We begin by calculating the number of list values that have been + defined in the first step. + + We use this number to allocate a properly sized array of structs + to keep the partition id and the value to use in that partition. + In the second traversal we assign them values in the struct array. + + Finally we sort the array of structs in order of values to enable + a quick binary search for the proper value to discover the + partition id. + After sorting the array we check that there are no duplicates in the + list. + */ + + no_parts= part_info->no_parts; + i= 0; + do + { + part_def= list_func_it++; + List_iterator<longlong> list_val_it1(part_def->list_val_list); + while (list_val_it1++) + no_list_values++; + } while (++i < no_parts); + list_func_it.rewind(); + part_info->no_list_values= no_list_values; + part_info->list_array= + (LIST_PART_ENTRY*)sql_alloc(no_list_values*sizeof(LIST_PART_ENTRY)); + if (unlikely(part_info->list_array == NULL)) + { + mem_alloc_error(no_list_values * sizeof(LIST_PART_ENTRY)); + goto end; + } + + i= 0; + do + { + part_def= list_func_it++; + List_iterator<longlong> list_val_it2(part_def->list_val_list); + while ((list_value= list_val_it2++)) + { + part_info->list_array[list_index].list_value= *list_value; + part_info->list_array[list_index++].partition_id= i; + } + } while (++i < no_parts); + + qsort((void*)part_info->list_array, no_list_values, + sizeof(LIST_PART_ENTRY), &list_part_cmp); + + not_first= FALSE; + i= prev_value= 0; //prev_value initialised to quiet compiler + do + { + curr_value= part_info->list_array[i].list_value; + if (likely(!not_first || prev_value != curr_value)) + { + prev_value= curr_value; + not_first= TRUE; + } + else + { + my_error(ER_MULTIPLE_DEF_CONST_IN_LIST_PART_ERROR, MYF(0)); + goto end; + } + } while (++i < no_list_values); + result= FALSE; +end: + DBUG_RETURN(result); +} + + + + + + + +/* + Check that all partitions use the same storage engine. + This is currently a limitation in this version. + + SYNOPSIS + check_engine_mix() + engine_array An array of engine identifiers + no_parts Total number of partitions + + RETURN VALUE + TRUE Error, mixed engines + FALSE Ok, no mixed engines + DESCRIPTION + Current check verifies only that all handlers are the same. + Later this check will be more sophisticated. +*/ + +static bool check_engine_mix(handlerton **engine_array, uint no_parts) +{ + uint i= 0; + bool result= FALSE; + DBUG_ENTER("check_engine_mix"); + + do + { + if (engine_array[i] != engine_array[0]) + { + result= TRUE; + break; + } + } while (++i < no_parts); + DBUG_RETURN(result); +} + + +/* + This code is used early in the CREATE TABLE and ALTER TABLE process. + + SYNOPSIS + check_partition_info() + part_info The reference to all partition information + file A reference to a handler of the table + max_rows Maximum number of rows stored in the table + engine_type Return value for used engine in partitions + + RETURN VALUE + TRUE Error, something went wrong + FALSE Ok, full partition data structures are now generated + + DESCRIPTION + We will check that the partition info requested is possible to set-up in + this version. This routine is an extension of the parser one could say. + If defaults were used we will generate default data structures for all + partitions. + +*/ + +bool check_partition_info(partition_info *part_info,handlerton **eng_type, + handler *file, ulonglong max_rows) +{ + handlerton **engine_array= NULL; + uint part_count= 0; + uint i, no_parts, tot_partitions; + bool result= TRUE; + char *same_name; + DBUG_ENTER("check_partition_info"); + + if (unlikely(part_info->is_sub_partitioned() && + (!(part_info->part_type == RANGE_PARTITION || + part_info->part_type == LIST_PARTITION)))) + { + /* Only RANGE and LIST partitioning can be subpartitioned */ + my_error(ER_SUBPARTITION_ERROR, MYF(0)); + goto end; + } + if (unlikely(part_info->set_up_defaults_for_partitioning(file, + max_rows, + (uint)0))) + goto end; + tot_partitions= part_info->get_tot_partitions(); + if (unlikely(tot_partitions > MAX_PARTITIONS)) + { + my_error(ER_TOO_MANY_PARTITIONS_ERROR, MYF(0)); + goto end; + } + if ((same_name= part_info->has_unique_names())) + { + my_error(ER_SAME_NAME_PARTITION, MYF(0), same_name); + goto end; + } + engine_array= (handlerton**)my_malloc(tot_partitions * sizeof(handlerton *), + MYF(MY_WME)); + if (unlikely(!engine_array)) + goto end; + i= 0; + no_parts= part_info->no_parts; + { + List_iterator<partition_element> part_it(part_info->partitions); + do + { + partition_element *part_elem= part_it++; + if (!part_info->is_sub_partitioned()) + { + if (part_elem->engine_type == NULL) + part_elem->engine_type= part_info->default_engine_type; + DBUG_PRINT("info", ("engine = %d", + ha_legacy_type(part_elem->engine_type))); + engine_array[part_count++]= part_elem->engine_type; + } + else + { + uint j= 0, no_subparts= part_info->no_subparts;; + List_iterator<partition_element> sub_it(part_elem->subpartitions); + do + { + part_elem= sub_it++; + if (part_elem->engine_type == NULL) + part_elem->engine_type= part_info->default_engine_type; + DBUG_PRINT("info", ("engine = %u", + ha_legacy_type(part_elem->engine_type))); + engine_array[part_count++]= part_elem->engine_type; + } while (++j < no_subparts); + } + } while (++i < part_info->no_parts); + } + if (unlikely(check_engine_mix(engine_array, part_count))) + { + my_error(ER_MIX_HANDLER_ERROR, MYF(0)); + goto end; + } + + if (eng_type) + *eng_type= (handlerton*)engine_array[0]; + + /* + We need to check all constant expressions that they are of the correct + type and that they are increasing for ranges and not overlapping for + list constants. + */ + + if (unlikely((part_info->part_type == RANGE_PARTITION && + check_range_constants(part_info)) || + (part_info->part_type == LIST_PARTITION && + check_list_constants(part_info)))) + goto end; + result= FALSE; +end: + my_free((char*)engine_array,MYF(MY_ALLOW_ZERO_PTR)); + DBUG_RETURN(result); +} + + +/* + This method is used to set-up both partition and subpartitioning + field array and used for all types of partitioning. + It is part of the logic around fix_partition_func. + + SYNOPSIS + set_up_field_array() + table TABLE object for which partition fields are set-up + sub_part Is the table subpartitioned as well + + RETURN VALUE + TRUE Error, some field didn't meet requirements + FALSE Ok, partition field array set-up + + DESCRIPTION + + A great number of functions below here is part of the fix_partition_func + method. It is used to set up the partition structures for execution from + openfrm. It is called at the end of the openfrm when the table struct has + been set-up apart from the partition information. + It involves: + 1) Setting arrays of fields for the partition functions. + 2) Setting up binary search array for LIST partitioning + 3) Setting up array for binary search for RANGE partitioning + 4) Setting up key_map's to assist in quick evaluation whether one + can deduce anything from a given index of what partition to use + 5) Checking whether a set of partitions can be derived from a range on + a field in the partition function. + As part of doing this there is also a great number of error controls. + This is actually the place where most of the things are checked for + partition information when creating a table. + Things that are checked includes + 1) All fields of partition function in Primary keys and unique indexes + (if not supported) + + + Create an array of partition fields (NULL terminated). Before this method + is called fix_fields or find_table_in_sef has been called to set + GET_FIXED_FIELDS_FLAG on all fields that are part of the partition + function. +*/ + +static bool set_up_field_array(TABLE *table, + bool is_sub_part) +{ + Field **ptr, *field, **field_array; + uint no_fields= 0; + uint size_field_array; + uint i= 0; + partition_info *part_info= table->part_info; + int result= FALSE; + DBUG_ENTER("set_up_field_array"); + + ptr= table->field; + while ((field= *(ptr++))) + { + if (field->flags & GET_FIXED_FIELDS_FLAG) + no_fields++; + } + if (no_fields == 0) + { + /* + We are using hidden key as partitioning field + */ + DBUG_ASSERT(!is_sub_part); + DBUG_RETURN(result); + } + size_field_array= (no_fields+1)*sizeof(Field*); + field_array= (Field**)sql_alloc(size_field_array); + if (unlikely(!field_array)) + { + mem_alloc_error(size_field_array); + result= TRUE; + } + ptr= table->field; + while ((field= *(ptr++))) + { + if (field->flags & GET_FIXED_FIELDS_FLAG) + { + field->flags&= ~GET_FIXED_FIELDS_FLAG; + field->flags|= FIELD_IN_PART_FUNC_FLAG; + if (likely(!result)) + { + field_array[i++]= field; + + /* + We check that the fields are proper. It is required for each + field in a partition function to: + 1) Not be a BLOB of any type + A BLOB takes too long time to evaluate so we don't want it for + performance reasons. + */ + + if (unlikely(field->flags & BLOB_FLAG)) + { + my_error(ER_BLOB_FIELD_IN_PART_FUNC_ERROR, MYF(0)); + result= TRUE; + } + } + } + } + field_array[no_fields]= 0; + if (!is_sub_part) + { + part_info->part_field_array= field_array; + part_info->no_part_fields= no_fields; + } + else + { + part_info->subpart_field_array= field_array; + part_info->no_subpart_fields= no_fields; + } + DBUG_RETURN(result); +} + + +/* + Create a field array including all fields of both the partitioning and the + subpartitioning functions. + + SYNOPSIS + create_full_part_field_array() + table TABLE object for which partition fields are set-up + part_info Reference to partitioning data structure + + RETURN VALUE + TRUE Memory allocation of field array failed + FALSE Ok + + DESCRIPTION + If there is no subpartitioning then the same array is used as for the + partitioning. Otherwise a new array is built up using the flag + FIELD_IN_PART_FUNC in the field object. + This function is called from fix_partition_func +*/ + +static bool create_full_part_field_array(TABLE *table, + partition_info *part_info) +{ + bool result= FALSE; + DBUG_ENTER("create_full_part_field_array"); + + if (!part_info->is_sub_partitioned()) + { + part_info->full_part_field_array= part_info->part_field_array; + part_info->no_full_part_fields= part_info->no_part_fields; + } + else + { + Field **ptr, *field, **field_array; + uint no_part_fields=0, size_field_array; + ptr= table->field; + while ((field= *(ptr++))) + { + if (field->flags & FIELD_IN_PART_FUNC_FLAG) + no_part_fields++; + } + size_field_array= (no_part_fields+1)*sizeof(Field*); + field_array= (Field**)sql_alloc(size_field_array); + if (unlikely(!field_array)) + { + mem_alloc_error(size_field_array); + result= TRUE; + goto end; + } + no_part_fields= 0; + ptr= table->field; + while ((field= *(ptr++))) + { + if (field->flags & FIELD_IN_PART_FUNC_FLAG) + field_array[no_part_fields++]= field; + } + field_array[no_part_fields]=0; + part_info->full_part_field_array= field_array; + part_info->no_full_part_fields= no_part_fields; + } +end: + DBUG_RETURN(result); +} + + +/* + + Clear flag GET_FIXED_FIELDS_FLAG in all fields of a key previously set by + set_indicator_in_key_fields (always used in pairs). + + SYNOPSIS + clear_indicator_in_key_fields() + key_info Reference to find the key fields + + RETURN VALUE + NONE + + DESCRIPTION + These support routines is used to set/reset an indicator of all fields + in a certain key. It is used in conjunction with another support routine + that traverse all fields in the PF to find if all or some fields in the + PF is part of the key. This is used to check primary keys and unique + keys involve all fields in PF (unless supported) and to derive the + key_map's used to quickly decide whether the index can be used to + derive which partitions are needed to scan. +*/ + +static void clear_indicator_in_key_fields(KEY *key_info) +{ + KEY_PART_INFO *key_part; + uint key_parts= key_info->key_parts, i; + for (i= 0, key_part=key_info->key_part; i < key_parts; i++, key_part++) + key_part->field->flags&= (~GET_FIXED_FIELDS_FLAG); +} + + +/* + Set flag GET_FIXED_FIELDS_FLAG in all fields of a key. + + SYNOPSIS + set_indicator_in_key_fields + key_info Reference to find the key fields + + RETURN VALUE + NONE +*/ + +static void set_indicator_in_key_fields(KEY *key_info) +{ + KEY_PART_INFO *key_part; + uint key_parts= key_info->key_parts, i; + for (i= 0, key_part=key_info->key_part; i < key_parts; i++, key_part++) + key_part->field->flags|= GET_FIXED_FIELDS_FLAG; +} + + +/* + Check if all or some fields in partition field array is part of a key + previously used to tag key fields. + + SYNOPSIS + check_fields_in_PF() + ptr Partition field array + out:all_fields Is all fields of partition field array used in key + out:some_fields Is some fields of partition field array used in key + + RETURN VALUE + all_fields, some_fields +*/ + +static void check_fields_in_PF(Field **ptr, bool *all_fields, + bool *some_fields) +{ + DBUG_ENTER("check_fields_in_PF"); + + *all_fields= TRUE; + *some_fields= FALSE; + if ((!ptr) || !(*ptr)) + { + *all_fields= FALSE; + DBUG_VOID_RETURN; + } + do + { + /* Check if the field of the PF is part of the current key investigated */ + if ((*ptr)->flags & GET_FIXED_FIELDS_FLAG) + *some_fields= TRUE; + else + *all_fields= FALSE; + } while (*(++ptr)); + DBUG_VOID_RETURN; +} + + +/* + Clear flag GET_FIXED_FIELDS_FLAG in all fields of the table. + This routine is used for error handling purposes. + + SYNOPSIS + clear_field_flag() + table TABLE object for which partition fields are set-up + + RETURN VALUE + NONE +*/ + +static void clear_field_flag(TABLE *table) +{ + Field **ptr; + DBUG_ENTER("clear_field_flag"); + + for (ptr= table->field; *ptr; ptr++) + (*ptr)->flags&= (~GET_FIXED_FIELDS_FLAG); + DBUG_VOID_RETURN; +} + + +/* + find_field_in_table_sef finds the field given its name. All fields get + GET_FIXED_FIELDS_FLAG set. + + SYNOPSIS + handle_list_of_fields() + it A list of field names for the partition function + table TABLE object for which partition fields are set-up + part_info Reference to partitioning data structure + sub_part Is the table subpartitioned as well + + RETURN VALUE + TRUE Fields in list of fields not part of table + FALSE All fields ok and array created + + DESCRIPTION + This routine sets-up the partition field array for KEY partitioning, it + also verifies that all fields in the list of fields is actually a part of + the table. + +*/ + + +static bool handle_list_of_fields(List_iterator<char> it, + TABLE *table, + partition_info *part_info, + bool is_sub_part) +{ + Field *field; + bool result; + char *field_name; + bool is_list_empty= TRUE; + DBUG_ENTER("handle_list_of_fields"); + + while ((field_name= it++)) + { + is_list_empty= FALSE; + field= find_field_in_table_sef(table, field_name); + if (likely(field != 0)) + field->flags|= GET_FIXED_FIELDS_FLAG; + else + { + my_error(ER_FIELD_NOT_FOUND_PART_ERROR, MYF(0)); + clear_field_flag(table); + result= TRUE; + goto end; + } + } + if (is_list_empty) + { + uint primary_key= table->s->primary_key; + if (primary_key != MAX_KEY) + { + uint no_key_parts= table->key_info[primary_key].key_parts, i; + /* + In the case of an empty list we use primary key as partition key. + */ + for (i= 0; i < no_key_parts; i++) + { + Field *field= table->key_info[primary_key].key_part[i].field; + field->flags|= GET_FIXED_FIELDS_FLAG; + } + } + else + { + if (table->s->db_type->partition_flags && + (table->s->db_type->partition_flags() & HA_USE_AUTO_PARTITION) && + (table->s->db_type->partition_flags() & HA_CAN_PARTITION)) + { + /* + This engine can handle automatic partitioning and there is no + primary key. In this case we rely on that the engine handles + partitioning based on a hidden key. Thus we allocate no + array for partitioning fields. + */ + DBUG_RETURN(FALSE); + } + else + { + my_error(ER_FIELD_NOT_FOUND_PART_ERROR, MYF(0)); + DBUG_RETURN(TRUE); + } + } + } + result= set_up_field_array(table, is_sub_part); +end: + DBUG_RETURN(result); +} + + +/* + The function uses a new feature in fix_fields where the flag + GET_FIXED_FIELDS_FLAG is set for all fields in the item tree. + This field must always be reset before returning from the function + since it is used for other purposes as well. + + SYNOPSIS + fix_fields_part_func() + thd The thread object + tables A list of one table, the partitioned table + func_expr The item tree reference of the partition function + part_info Reference to partitioning data structure + sub_part Is the table subpartitioned as well + + RETURN VALUE + TRUE An error occurred, something was wrong with the + partition function. + FALSE Ok, a partition field array was created + + DESCRIPTION + This function is used to build an array of partition fields for the + partitioning function and subpartitioning function. The partitioning + function is an item tree that must reference at least one field in the + table. This is checked first in the parser that the function doesn't + contain non-cacheable parts (like a random function) and by checking + here that the function isn't a constant function. + + Calculate the number of fields in the partition function. + Use it allocate memory for array of Field pointers. + Initialise array of field pointers. Use information set when + calling fix_fields and reset it immediately after. + The get_fields_in_item_tree activates setting of bit in flags + on the field object. +*/ + +static bool fix_fields_part_func(THD *thd, TABLE_LIST *tables, + Item* func_expr, partition_info *part_info, + bool is_sub_part) +{ + bool result= TRUE; + TABLE *table= tables->table; + TABLE_LIST *save_table_list, *save_first_table, *save_last_table; + int error; + Name_resolution_context *context; + const char *save_where; + DBUG_ENTER("fix_fields_part_func"); + + context= thd->lex->current_context(); + table->map= 1; //To ensure correct calculation of const item + table->get_fields_in_item_tree= TRUE; + save_table_list= context->table_list; + save_first_table= context->first_name_resolution_table; + save_last_table= context->last_name_resolution_table; + context->table_list= tables; + context->first_name_resolution_table= tables; + context->last_name_resolution_table= NULL; + func_expr->walk(&Item::change_context_processor, (byte*) context); + save_where= thd->where; + thd->where= "partition function"; + error= func_expr->fix_fields(thd, (Item**)0); + context->table_list= save_table_list; + context->first_name_resolution_table= save_first_table; + context->last_name_resolution_table= save_last_table; + if (unlikely(error)) + { + DBUG_PRINT("info", ("Field in partition function not part of table")); + clear_field_flag(table); + goto end; + } + thd->where= save_where; + if (unlikely(func_expr->const_item())) + { + my_error(ER_CONST_EXPR_IN_PARTITION_FUNC_ERROR, MYF(0)); + clear_field_flag(table); + goto end; + } + result= set_up_field_array(table, is_sub_part); +end: + table->get_fields_in_item_tree= FALSE; + table->map= 0; //Restore old value + DBUG_RETURN(result); +} + + +/* + Check that the primary key contains all partition fields if defined + + SYNOPSIS + check_primary_key() + table TABLE object for which partition fields are set-up + + RETURN VALUES + TRUE Not all fields in partitioning function was part + of primary key + FALSE Ok, all fields of partitioning function were part + of primary key + + DESCRIPTION + This function verifies that if there is a primary key that it contains + all the fields of the partition function. + This is a temporary limitation that will hopefully be removed after a + while. +*/ + +static bool check_primary_key(TABLE *table) +{ + uint primary_key= table->s->primary_key; + bool all_fields, some_fields; + bool result= FALSE; + DBUG_ENTER("check_primary_key"); + + if (primary_key < MAX_KEY) + { + set_indicator_in_key_fields(table->key_info+primary_key); + check_fields_in_PF(table->part_info->full_part_field_array, + &all_fields, &some_fields); + clear_indicator_in_key_fields(table->key_info+primary_key); + if (unlikely(!all_fields)) + { + my_error(ER_UNIQUE_KEY_NEED_ALL_FIELDS_IN_PF,MYF(0),"PRIMARY KEY"); + result= TRUE; + } + } + DBUG_RETURN(result); +} + + +/* + Check that unique keys contains all partition fields + + SYNOPSIS + check_unique_keys() + table TABLE object for which partition fields are set-up + + RETURN VALUES + TRUE Not all fields in partitioning function was part + of all unique keys + FALSE Ok, all fields of partitioning function were part + of unique keys + + DESCRIPTION + This function verifies that if there is a unique index that it contains + all the fields of the partition function. + This is a temporary limitation that will hopefully be removed after a + while. +*/ + +static bool check_unique_keys(TABLE *table) +{ + bool all_fields, some_fields; + bool result= FALSE; + uint keys= table->s->keys; + uint i; + DBUG_ENTER("check_unique_keys"); + + for (i= 0; i < keys; i++) + { + if (table->key_info[i].flags & HA_NOSAME) //Unique index + { + set_indicator_in_key_fields(table->key_info+i); + check_fields_in_PF(table->part_info->full_part_field_array, + &all_fields, &some_fields); + clear_indicator_in_key_fields(table->key_info+i); + if (unlikely(!all_fields)) + { + my_error(ER_UNIQUE_KEY_NEED_ALL_FIELDS_IN_PF,MYF(0),"UNIQUE INDEX"); + result= TRUE; + break; + } + } + } + DBUG_RETURN(result); +} + + +/* + An important optimisation is whether a range on a field can select a subset + of the partitions. + A prerequisite for this to happen is that the PF is a growing function OR + a shrinking function. + This can never happen for a multi-dimensional PF. Thus this can only happen + with PF with at most one field involved in the PF. + The idea is that if the function is a growing function and you know that + the field of the PF is 4 <= A <= 6 then we can convert this to a range + in the PF instead by setting the range to PF(4) <= PF(A) <= PF(6). In the + case of RANGE PARTITIONING and LIST PARTITIONING this can be used to + calculate a set of partitions rather than scanning all of them. + Thus the following prerequisites are there to check if sets of partitions + can be found. + 1) Only possible for RANGE and LIST partitioning (not for subpartitioning) + 2) Only possible if PF only contains 1 field + 3) Possible if PF is a growing function of the field + 4) Possible if PF is a shrinking function of the field + OBSERVATION: + 1) IF f1(A) is a growing function AND f2(A) is a growing function THEN + f1(A) + f2(A) is a growing function + f1(A) * f2(A) is a growing function if f1(A) >= 0 and f2(A) >= 0 + 2) IF f1(A) is a growing function and f2(A) is a shrinking function THEN + f1(A) / f2(A) is a growing function if f1(A) >= 0 and f2(A) > 0 + 3) IF A is a growing function then a function f(A) that removes the + least significant portion of A is a growing function + E.g. DATE(datetime) is a growing function + MONTH(datetime) is not a growing/shrinking function + 4) IF f1(A) is a growing function and f2(A) is a growing function THEN + f1(f2(A)) and f2(f1(A)) are also growing functions + 5) IF f1(A) is a shrinking function and f2(A) is a growing function THEN + f1(f2(A)) is a shrinking function and f2(f1(A)) is a shrinking function + 6) f1(A) = A is a growing function + 7) f1(A) = A*a + b (where a and b are constants) is a growing function + + By analysing the item tree of the PF we can use these deducements and + derive whether the PF is a growing function or a shrinking function or + neither of it. + + If the PF is range capable then a flag is set on the table object + indicating this to notify that we can use also ranges on the field + of the PF to deduce a set of partitions if the fields of the PF were + not all fully bound. + + SYNOPSIS + check_range_capable_PF() + table TABLE object for which partition fields are set-up + + DESCRIPTION + Support for this is not implemented yet. +*/ + +void check_range_capable_PF(TABLE *table) +{ + DBUG_ENTER("check_range_capable_PF"); + + DBUG_VOID_RETURN; +} + + +/* + Set up partition bitmap + + SYNOPSIS + set_up_partition_bitmap() + thd Thread object + part_info Reference to partitioning data structure + + RETURN VALUE + TRUE Memory allocation failure + FALSE Success + + DESCRIPTION + Allocate memory for bitmap of the partitioned table + and initialise it. +*/ + +static bool set_up_partition_bitmap(THD *thd, partition_info *part_info) +{ + uint32 *bitmap_buf; + uint bitmap_bits= part_info->no_subparts? + (part_info->no_subparts* part_info->no_parts): + part_info->no_parts; + uint bitmap_bytes= bitmap_buffer_size(bitmap_bits); + DBUG_ENTER("set_up_partition_bitmap"); + + if (!(bitmap_buf= (uint32*)thd->alloc(bitmap_bytes))) + { + mem_alloc_error(bitmap_bytes); + DBUG_RETURN(TRUE); + } + bitmap_init(&part_info->used_partitions, bitmap_buf, bitmap_bytes*8, FALSE); + bitmap_set_all(&part_info->used_partitions); + DBUG_RETURN(FALSE); +} + + +/* + Set up partition key maps + + SYNOPSIS + set_up_partition_key_maps() + table TABLE object for which partition fields are set-up + part_info Reference to partitioning data structure + + RETURN VALUES + None + + DESCRIPTION + This function sets up a couple of key maps to be able to quickly check + if an index ever can be used to deduce the partition fields or even + a part of the fields of the partition function. + We set up the following key_map's. + PF = Partition Function + 1) All fields of the PF is set even by equal on the first fields in the + key + 2) All fields of the PF is set if all fields of the key is set + 3) At least one field in the PF is set if all fields is set + 4) At least one field in the PF is part of the key +*/ + +static void set_up_partition_key_maps(TABLE *table, + partition_info *part_info) +{ + uint keys= table->s->keys; + uint i; + bool all_fields, some_fields; + DBUG_ENTER("set_up_partition_key_maps"); + + part_info->all_fields_in_PF.clear_all(); + part_info->all_fields_in_PPF.clear_all(); + part_info->all_fields_in_SPF.clear_all(); + part_info->some_fields_in_PF.clear_all(); + for (i= 0; i < keys; i++) + { + set_indicator_in_key_fields(table->key_info+i); + check_fields_in_PF(part_info->full_part_field_array, + &all_fields, &some_fields); + if (all_fields) + part_info->all_fields_in_PF.set_bit(i); + if (some_fields) + part_info->some_fields_in_PF.set_bit(i); + if (part_info->is_sub_partitioned()) + { + check_fields_in_PF(part_info->part_field_array, + &all_fields, &some_fields); + if (all_fields) + part_info->all_fields_in_PPF.set_bit(i); + check_fields_in_PF(part_info->subpart_field_array, + &all_fields, &some_fields); + if (all_fields) + part_info->all_fields_in_SPF.set_bit(i); + } + clear_indicator_in_key_fields(table->key_info+i); + } + DBUG_VOID_RETURN; +} + + +/* + Set up function pointers for partition function + + SYNOPSIS + set_up_partition_func_pointers() + part_info Reference to partitioning data structure + + RETURN VALUE + NONE + + DESCRIPTION + Set-up all function pointers for calculation of partition id, + subpartition id and the upper part in subpartitioning. This is to speed up + execution of get_partition_id which is executed once every record to be + written and deleted and twice for updates. +*/ + +static void set_up_partition_func_pointers(partition_info *part_info) +{ + DBUG_ENTER("set_up_partition_func_pointers"); + + if (part_info->is_sub_partitioned()) + { + if (part_info->part_type == RANGE_PARTITION) + { + part_info->get_part_partition_id= get_partition_id_range; + if (part_info->list_of_subpart_fields) + { + if (part_info->linear_hash_ind) + { + part_info->get_partition_id= get_partition_id_range_sub_linear_key; + part_info->get_subpartition_id= get_partition_id_linear_key_sub; + } + else + { + part_info->get_partition_id= get_partition_id_range_sub_key; + part_info->get_subpartition_id= get_partition_id_key_sub; + } + } + else + { + if (part_info->linear_hash_ind) + { + part_info->get_partition_id= get_partition_id_range_sub_linear_hash; + part_info->get_subpartition_id= get_partition_id_linear_hash_sub; + } + else + { + part_info->get_partition_id= get_partition_id_range_sub_hash; + part_info->get_subpartition_id= get_partition_id_hash_sub; + } + } + } + else /* LIST Partitioning */ + { + part_info->get_part_partition_id= get_partition_id_list; + if (part_info->list_of_subpart_fields) + { + if (part_info->linear_hash_ind) + { + part_info->get_partition_id= get_partition_id_list_sub_linear_key; + part_info->get_subpartition_id= get_partition_id_linear_key_sub; + } + else + { + part_info->get_partition_id= get_partition_id_list_sub_key; + part_info->get_subpartition_id= get_partition_id_key_sub; + } + } + else + { + if (part_info->linear_hash_ind) + { + part_info->get_partition_id= get_partition_id_list_sub_linear_hash; + part_info->get_subpartition_id= get_partition_id_linear_hash_sub; + } + else + { + part_info->get_partition_id= get_partition_id_list_sub_hash; + part_info->get_subpartition_id= get_partition_id_hash_sub; + } + } + } + } + else /* No subpartitioning */ + { + part_info->get_part_partition_id= NULL; + part_info->get_subpartition_id= NULL; + if (part_info->part_type == RANGE_PARTITION) + part_info->get_partition_id= get_partition_id_range; + else if (part_info->part_type == LIST_PARTITION) + part_info->get_partition_id= get_partition_id_list; + else /* HASH partitioning */ + { + if (part_info->list_of_part_fields) + { + if (part_info->linear_hash_ind) + part_info->get_partition_id= get_partition_id_linear_key_nosub; + else + part_info->get_partition_id= get_partition_id_key_nosub; + } + else + { + if (part_info->linear_hash_ind) + part_info->get_partition_id= get_partition_id_linear_hash_nosub; + else + part_info->get_partition_id= get_partition_id_hash_nosub; + } + } + } + DBUG_VOID_RETURN; +} + + +/* + For linear hashing we need a mask which is on the form 2**n - 1 where + 2**n >= no_parts. Thus if no_parts is 6 then mask is 2**3 - 1 = 8 - 1 = 7. + + SYNOPSIS + set_linear_hash_mask() + part_info Reference to partitioning data structure + no_parts Number of parts in linear hash partitioning + + RETURN VALUE + NONE +*/ + +static void set_linear_hash_mask(partition_info *part_info, uint no_parts) +{ + uint mask; + + for (mask= 1; mask < no_parts; mask<<=1) + ; + part_info->linear_hash_mask= mask - 1; +} + + +/* + This function calculates the partition id provided the result of the hash + function using linear hashing parameters, mask and number of partitions. + + SYNOPSIS + get_part_id_from_linear_hash() + hash_value Hash value calculated by HASH function or KEY function + mask Mask calculated previously by set_linear_hash_mask + no_parts Number of partitions in HASH partitioned part + + RETURN VALUE + part_id The calculated partition identity (starting at 0) + + DESCRIPTION + The partition is calculated according to the theory of linear hashing. + See e.g. Linear hashing: a new tool for file and table addressing, + Reprinted from VLDB-80 in Readings Database Systems, 2nd ed, M. Stonebraker + (ed.), Morgan Kaufmann 1994. +*/ + +static uint32 get_part_id_from_linear_hash(longlong hash_value, uint mask, + uint no_parts) +{ + uint32 part_id= (uint32)(hash_value & mask); + + if (part_id >= no_parts) + { + uint new_mask= ((mask + 1) >> 1) - 1; + part_id= (uint32)(hash_value & new_mask); + } + return part_id; +} + +/* + fix partition functions + + SYNOPSIS + fix_partition_func() + thd The thread object + name The name of the partitioned table + table TABLE object for which partition fields are set-up + create_table_ind Indicator of whether openfrm was called as part of + CREATE or ALTER TABLE + + RETURN VALUE + TRUE Error + FALSE Success + + DESCRIPTION + The name parameter contains the full table name and is used to get the + database name of the table which is used to set-up a correct + TABLE_LIST object for use in fix_fields. + +NOTES + This function is called as part of opening the table by opening the .frm + file. It is a part of CREATE TABLE to do this so it is quite permissible + that errors due to erroneus syntax isn't found until we come here. + If the user has used a non-existing field in the table is one such example + of an error that is not discovered until here. +*/ + +bool fix_partition_func(THD *thd, const char* name, TABLE *table, + bool is_create_table_ind) +{ + bool result= TRUE; + uint dir_length, home_dir_length; + TABLE_LIST tables; + TABLE_SHARE *share= table->s; + char db_name_string[FN_REFLEN]; + char* db_name; + partition_info *part_info= table->part_info; + ulong save_set_query_id= thd->set_query_id; + DBUG_ENTER("fix_partition_func"); + + if (part_info->fixed) + { + DBUG_RETURN(FALSE); + } + thd->set_query_id= 0; + DBUG_PRINT("info", ("thd->set_query_id: %d", thd->set_query_id)); + /* + Set-up the TABLE_LIST object to be a list with a single table + Set the object to zero to create NULL pointers and set alias + and real name to table name and get database name from file name. + */ + + bzero((void*)&tables, sizeof(TABLE_LIST)); + tables.alias= tables.table_name= (char*) share->table_name.str; + tables.table= table; + tables.next_local= 0; + tables.next_name_resolution_table= 0; + strmov(db_name_string, name); + dir_length= dirname_length(db_name_string); + db_name_string[dir_length - 1]= 0; + home_dir_length= dirname_length(db_name_string); + db_name= &db_name_string[home_dir_length]; + tables.db= db_name; + + if (!is_create_table_ind) + { + if (partition_default_handling(table, part_info, + table->s->normalized_path.str)) + { + DBUG_RETURN(TRUE); + } + } + if (part_info->is_sub_partitioned()) + { + DBUG_ASSERT(part_info->subpart_type == HASH_PARTITION); + /* + Subpartition is defined. We need to verify that subpartitioning + function is correct. + */ + if (part_info->linear_hash_ind) + set_linear_hash_mask(part_info, part_info->no_subparts); + if (part_info->list_of_subpart_fields) + { + List_iterator<char> it(part_info->subpart_field_list); + if (unlikely(handle_list_of_fields(it, table, part_info, TRUE))) + goto end; + } + else + { + if (unlikely(fix_fields_part_func(thd, &tables, + part_info->subpart_expr, part_info, + TRUE))) + goto end; + if (unlikely(part_info->subpart_expr->result_type() != INT_RESULT)) + { + my_error(ER_PARTITION_FUNC_NOT_ALLOWED_ERROR, MYF(0), + "SUBPARTITION"); + goto end; + } + } + } + DBUG_ASSERT(part_info->part_type != NOT_A_PARTITION); + /* + Partition is defined. We need to verify that partitioning + function is correct. + */ + if (part_info->part_type == HASH_PARTITION) + { + if (part_info->linear_hash_ind) + set_linear_hash_mask(part_info, part_info->no_parts); + if (part_info->list_of_part_fields) + { + List_iterator<char> it(part_info->part_field_list); + if (unlikely(handle_list_of_fields(it, table, part_info, FALSE))) + goto end; + } + else + { + if (unlikely(fix_fields_part_func(thd, &tables, part_info->part_expr, + part_info, FALSE))) + goto end; + if (unlikely(part_info->part_expr->result_type() != INT_RESULT)) + { + my_error(ER_PARTITION_FUNC_NOT_ALLOWED_ERROR, MYF(0), part_str); + goto end; + } + part_info->part_result_type= INT_RESULT; + } + } + else + { + const char *error_str; + if (part_info->part_type == RANGE_PARTITION) + { + error_str= partition_keywords[PKW_RANGE].str; + if (unlikely(check_range_constants(part_info))) + goto end; + } + else if (part_info->part_type == LIST_PARTITION) + { + error_str= partition_keywords[PKW_LIST].str; + if (unlikely(check_list_constants(part_info))) + goto end; + } + else + { + DBUG_ASSERT(0); + my_error(ER_INCONSISTENT_PARTITION_INFO_ERROR, MYF(0)); + goto end; + } + if (unlikely(part_info->no_parts < 1)) + { + my_error(ER_PARTITIONS_MUST_BE_DEFINED_ERROR, MYF(0), error_str); + goto end; + } + if (unlikely(fix_fields_part_func(thd, &tables, part_info->part_expr, + part_info, FALSE))) + goto end; + if (unlikely(part_info->part_expr->result_type() != INT_RESULT)) + { + my_error(ER_PARTITION_FUNC_NOT_ALLOWED_ERROR, MYF(0), part_str); + goto end; + } + } + if (unlikely(create_full_part_field_array(table, part_info))) + goto end; + if (unlikely(check_primary_key(table))) + goto end; + if (unlikely((!(table->s->db_type->partition_flags && + (table->s->db_type->partition_flags() & HA_CAN_PARTITION_UNIQUE))) && + check_unique_keys(table))) + goto end; + if (unlikely(set_up_partition_bitmap(thd, part_info))) + goto end; + check_range_capable_PF(table); + set_up_partition_key_maps(table, part_info); + set_up_partition_func_pointers(part_info); + part_info->fixed= TRUE; + set_up_range_analysis_info(part_info); + result= FALSE; +end: + thd->set_query_id= save_set_query_id; + DBUG_PRINT("info", ("thd->set_query_id: %d", thd->set_query_id)); + DBUG_RETURN(result); +} + + +/* + The code below is support routines for the reverse parsing of the + partitioning syntax. This feature is very useful to generate syntax for + all default values to avoid all default checking when opening the frm + file. It is also used when altering the partitioning by use of various + ALTER TABLE commands. Finally it is used for SHOW CREATE TABLES. +*/ + +static int add_write(File fptr, const char *buf, uint len) +{ + uint len_written= my_write(fptr, (const byte*)buf, len, MYF(0)); + + if (likely(len == len_written)) + return 0; + else + return 1; +} + +static int add_string(File fptr, const char *string) +{ + return add_write(fptr, string, strlen(string)); +} + +static int add_string_len(File fptr, const char *string, uint len) +{ + return add_write(fptr, string, len); +} + +static int add_space(File fptr) +{ + return add_string(fptr, space_str); +} + +static int add_comma(File fptr) +{ + return add_string(fptr, comma_str); +} + +static int add_equal(File fptr) +{ + return add_string(fptr, equal_str); +} + +static int add_end_parenthesis(File fptr) +{ + return add_string(fptr, end_paren_str); +} + +static int add_begin_parenthesis(File fptr) +{ + return add_string(fptr, begin_paren_str); +} + +static int add_part_key_word(File fptr, const char *key_string) +{ + int err= add_string(fptr, key_string); + + err+= add_space(fptr); + return err + add_begin_parenthesis(fptr); +} + +static int add_hash(File fptr) +{ + return add_part_key_word(fptr, partition_keywords[PKW_HASH].str); +} + +static int add_partition(File fptr) +{ + strxmov(buff, part_str, space_str, NullS); + return add_string(fptr, buff); +} + +static int add_subpartition(File fptr) +{ + int err= add_string(fptr, sub_str); + + return err + add_partition(fptr); +} + +static int add_partition_by(File fptr) +{ + strxmov(buff, part_str, space_str, by_str, space_str, NullS); + return add_string(fptr, buff); +} + +static int add_subpartition_by(File fptr) +{ + int err= add_string(fptr, sub_str); + + return err + add_partition_by(fptr); +} + +static int add_key_partition(File fptr, List<char> field_list) +{ + uint i, no_fields; + int err; + + List_iterator<char> part_it(field_list); + err= add_part_key_word(fptr, partition_keywords[PKW_KEY].str); + no_fields= field_list.elements; + i= 0; + while (i < no_fields) + { + const char *field_str= part_it++; + err+= add_string(fptr, field_str); + if (i != (no_fields-1)) + err+= add_comma(fptr); + i++; + } + return err; +} + +static int add_int(File fptr, longlong number) +{ + llstr(number, buff); + return add_string(fptr, buff); +} + +static int add_keyword_string(File fptr, const char *keyword, + bool should_use_quotes, + const char *keystr) +{ + int err= add_string(fptr, keyword); + + err+= add_space(fptr); + err+= add_equal(fptr); + err+= add_space(fptr); + if (should_use_quotes) + err+= add_string(fptr, "'"); + err+= add_string(fptr, keystr); + if (should_use_quotes) + err+= add_string(fptr, "'"); + return err + add_space(fptr); +} + +static int add_keyword_int(File fptr, const char *keyword, longlong num) +{ + int err= add_string(fptr, keyword); + + err+= add_space(fptr); + err+= add_equal(fptr); + err+= add_space(fptr); + err+= add_int(fptr, num); + return err + add_space(fptr); +} + +static int add_engine(File fptr, handlerton *engine_type) +{ + const char *engine_str= engine_type->name; + DBUG_PRINT("info", ("ENGINE = %s", engine_str)); + int err= add_string(fptr, "ENGINE = "); + return err + add_string(fptr, engine_str); +} + +static int add_partition_options(File fptr, partition_element *p_elem) +{ + int err= 0; + + if (p_elem->tablespace_name) + err+= add_keyword_string(fptr,"TABLESPACE", FALSE, + p_elem->tablespace_name); + if (p_elem->nodegroup_id != UNDEF_NODEGROUP) + err+= add_keyword_int(fptr,"NODEGROUP",(longlong)p_elem->nodegroup_id); + if (p_elem->part_max_rows) + err+= add_keyword_int(fptr,"MAX_ROWS",(longlong)p_elem->part_max_rows); + if (p_elem->part_min_rows) + err+= add_keyword_int(fptr,"MIN_ROWS",(longlong)p_elem->part_min_rows); + if (p_elem->data_file_name) + err+= add_keyword_string(fptr, "DATA DIRECTORY", TRUE, + p_elem->data_file_name); + if (p_elem->index_file_name) + err+= add_keyword_string(fptr, "INDEX DIRECTORY", TRUE, + p_elem->index_file_name); + if (p_elem->part_comment) + err+= add_keyword_string(fptr, "COMMENT", FALSE, p_elem->part_comment); + return err + add_engine(fptr,p_elem->engine_type); +} + +static int add_partition_values(File fptr, partition_info *part_info, + partition_element *p_elem) +{ + int err= 0; + + if (part_info->part_type == RANGE_PARTITION) + { + err+= add_string(fptr, "VALUES LESS THAN "); + if (p_elem->range_value != LONGLONG_MAX) + { + err+= add_begin_parenthesis(fptr); + err+= add_int(fptr, p_elem->range_value); + err+= add_end_parenthesis(fptr); + } + else + err+= add_string(fptr, partition_keywords[PKW_MAXVALUE].str); + } + else if (part_info->part_type == LIST_PARTITION) + { + uint i; + List_iterator<longlong> list_val_it(p_elem->list_val_list); + err+= add_string(fptr, "VALUES IN "); + uint no_items= p_elem->list_val_list.elements; + err+= add_begin_parenthesis(fptr); + i= 0; + do + { + longlong *list_value= list_val_it++; + err+= add_int(fptr, *list_value); + if (i != (no_items-1)) + err+= add_comma(fptr); + } while (++i < no_items); + err+= add_end_parenthesis(fptr); + } + return err + add_space(fptr); +} + +/* + Generate the partition syntax from the partition data structure. + Useful for support of generating defaults, SHOW CREATE TABLES + and easy partition management. + + SYNOPSIS + generate_partition_syntax() + part_info The partitioning data structure + buf_length A pointer to the returned buffer length + use_sql_alloc Allocate buffer from sql_alloc if true + otherwise use my_malloc + write_all Write everything, also default values + + RETURN VALUES + NULL error + buf, buf_length Buffer and its length + + DESCRIPTION + Here we will generate the full syntax for the given command where all + defaults have been expanded. By so doing the it is also possible to + make lots of checks of correctness while at it. + This could will also be reused for SHOW CREATE TABLES and also for all + type ALTER TABLE commands focusing on changing the PARTITION structure + in any fashion. + + The implementation writes the syntax to a temporary file (essentially + an abstraction of a dynamic array) and if all writes goes well it + allocates a buffer and writes the syntax into this one and returns it. + + As a security precaution the file is deleted before writing into it. This + means that no other processes on the machine can open and read the file + while this processing is ongoing. + + The code is optimised for minimal code size since it is not used in any + common queries. +*/ + +char *generate_partition_syntax(partition_info *part_info, + uint *buf_length, + bool use_sql_alloc, + bool write_all) +{ + uint i,j, tot_no_parts, no_subparts, no_parts; + partition_element *part_elem; + partition_element *save_part_elem= NULL; + ulonglong buffer_length; + char path[FN_REFLEN]; + int err= 0; + List_iterator<partition_element> part_it(part_info->partitions); + List_iterator<partition_element> temp_it(part_info->temp_partitions); + File fptr; + char *buf= NULL; //Return buffer + uint use_temp= 0; + uint no_temp_parts= part_info->temp_partitions.elements; + bool write_part_state; + DBUG_ENTER("generate_partition_syntax"); + + write_part_state= (part_info->part_state && !part_info->part_state_len); + if (unlikely(((fptr= create_temp_file(path,mysql_tmpdir,"psy", + O_RDWR | O_BINARY | O_TRUNC | + O_TEMPORARY, MYF(MY_WME)))) < 0)) + DBUG_RETURN(NULL); +#ifndef __WIN__ + unlink(path); +#endif + err+= add_space(fptr); + err+= add_partition_by(fptr); + switch (part_info->part_type) + { + case RANGE_PARTITION: + err+= add_part_key_word(fptr, partition_keywords[PKW_RANGE].str); + break; + case LIST_PARTITION: + err+= add_part_key_word(fptr, partition_keywords[PKW_LIST].str); + break; + case HASH_PARTITION: + if (part_info->linear_hash_ind) + err+= add_string(fptr, partition_keywords[PKW_LINEAR].str); + if (part_info->list_of_part_fields) + err+= add_key_partition(fptr, part_info->part_field_list); + else + err+= add_hash(fptr); + break; + default: + DBUG_ASSERT(0); + /* We really shouldn't get here, no use in continuing from here */ + current_thd->fatal_error(); + DBUG_RETURN(NULL); + } + if (part_info->part_expr) + err+= add_string_len(fptr, part_info->part_func_string, + part_info->part_func_len); + err+= add_end_parenthesis(fptr); + err+= add_space(fptr); + if ((!part_info->use_default_no_partitions) && + part_info->use_default_partitions) + { + err+= add_string(fptr, "PARTITIONS "); + err+= add_int(fptr, part_info->no_parts); + err+= add_space(fptr); + } + if (part_info->is_sub_partitioned()) + { + err+= add_subpartition_by(fptr); + /* Must be hash partitioning for subpartitioning */ + if (part_info->list_of_subpart_fields) + err+= add_key_partition(fptr, part_info->subpart_field_list); + else + err+= add_hash(fptr); + if (part_info->subpart_expr) + err+= add_string_len(fptr, part_info->subpart_func_string, + part_info->subpart_func_len); + err+= add_end_parenthesis(fptr); + err+= add_space(fptr); + if ((!part_info->use_default_no_subpartitions) && + part_info->use_default_subpartitions) + { + err+= add_string(fptr, "SUBPARTITIONS "); + err+= add_int(fptr, part_info->no_subparts); + err+= add_space(fptr); + } + } + no_parts= part_info->no_parts; + tot_no_parts= no_parts + no_temp_parts; + no_subparts= part_info->no_subparts; + + if (write_all || (!part_info->use_default_partitions)) + { + err+= add_begin_parenthesis(fptr); + i= 0; + do + { + /* + We need to do some clever list manipulation here since we have two + different needs for our list processing and here we take some of the + cost of using a simpler list processing for the other parts of the + code. + + ALTER TABLE REORGANIZE PARTITIONS has the list of partitions to be + the final list as the main list and the reorganised partitions is in + the temporary partition list. Thus when finding the first part added + we insert the temporary list if there is such a list. If there is no + temporary list we are performing an ADD PARTITION. + */ + if (use_temp && use_temp <= no_temp_parts) + { + part_elem= temp_it++; + DBUG_ASSERT(no_temp_parts); + no_temp_parts--; + } + else if (use_temp) + { + DBUG_ASSERT(no_parts); + part_elem= save_part_elem; + use_temp= 0; + no_parts--; + } + else + { + part_elem= part_it++; + if ((part_elem->part_state == PART_TO_BE_ADDED || + part_elem->part_state == PART_IS_ADDED) && no_temp_parts) + { + save_part_elem= part_elem; + part_elem= temp_it++; + no_temp_parts--; + use_temp= 1; + } + else + { + DBUG_ASSERT(no_parts); + no_parts--; + } + } + + if (part_elem->part_state != PART_IS_DROPPED) + { + if (write_part_state) + { + uint32 part_state_id= part_info->part_state_len; + part_info->part_state[part_state_id]= (uchar)part_elem->part_state; + part_info->part_state_len= part_state_id+1; + } + err+= add_partition(fptr); + err+= add_string(fptr, part_elem->partition_name); + err+= add_space(fptr); + err+= add_partition_values(fptr, part_info, part_elem); + if (!part_info->is_sub_partitioned()) + err+= add_partition_options(fptr, part_elem); + if (part_info->is_sub_partitioned() && + (write_all || (!part_info->use_default_subpartitions))) + { + err+= add_space(fptr); + err+= add_begin_parenthesis(fptr); + List_iterator<partition_element> sub_it(part_elem->subpartitions); + j= 0; + do + { + part_elem= sub_it++; + err+= add_subpartition(fptr); + err+= add_string(fptr, part_elem->partition_name); + err+= add_space(fptr); + err+= add_partition_options(fptr, part_elem); + if (j != (no_subparts-1)) + { + err+= add_comma(fptr); + err+= add_space(fptr); + } + else + err+= add_end_parenthesis(fptr); + } while (++j < no_subparts); + } + if (i != (tot_no_parts-1)) + { + err+= add_comma(fptr); + err+= add_space(fptr); + } + } + if (i == (tot_no_parts-1)) + err+= add_end_parenthesis(fptr); + } while (++i < tot_no_parts); + DBUG_ASSERT(!no_parts && !no_temp_parts); + } + if (err) + goto close_file; + buffer_length= my_seek(fptr, 0L,MY_SEEK_END,MYF(0)); + if (unlikely(buffer_length == MY_FILEPOS_ERROR)) + goto close_file; + if (unlikely(my_seek(fptr, 0L, MY_SEEK_SET, MYF(0)) == MY_FILEPOS_ERROR)) + goto close_file; + *buf_length= (uint)buffer_length; + if (use_sql_alloc) + buf= sql_alloc(*buf_length+1); + else + buf= my_malloc(*buf_length+1, MYF(MY_WME)); + if (!buf) + goto close_file; + + if (unlikely(my_read(fptr, (byte*)buf, *buf_length, MYF(MY_FNABP)))) + { + if (!use_sql_alloc) + my_free(buf, MYF(0)); + else + buf= NULL; + } + else + buf[*buf_length]= 0; + +close_file: + my_close(fptr, MYF(0)); + DBUG_RETURN(buf); +} + + +/* + Check if partition key fields are modified and if it can be handled by the + underlying storage engine. + + SYNOPSIS + partition_key_modified + table TABLE object for which partition fields are set-up + fields A list of the to be modifed + + RETURN VALUES + TRUE Need special handling of UPDATE + FALSE Normal UPDATE handling is ok +*/ + +bool partition_key_modified(TABLE *table, List<Item> &fields) +{ + List_iterator_fast<Item> f(fields); + partition_info *part_info= table->part_info; + Item_field *item_field; + DBUG_ENTER("partition_key_modified"); + + if (!part_info) + DBUG_RETURN(FALSE); + if (table->s->db_type->partition_flags && + (table->s->db_type->partition_flags() & HA_CAN_UPDATE_PARTITION_KEY)) + DBUG_RETURN(FALSE); + f.rewind(); + while ((item_field=(Item_field*) f++)) + if (item_field->field->flags & FIELD_IN_PART_FUNC_FLAG) + DBUG_RETURN(TRUE); + DBUG_RETURN(FALSE); +} + + +/* + A function to handle correct handling of NULL values in partition + functions. + SYNOPSIS + part_val_int() + item_expr The item expression to evaluate + RETURN VALUES + The value of the partition function, LONGLONG_MIN if any null value + in function +*/ + +static +inline +longlong +part_val_int(Item *item_expr) +{ + longlong value= item_expr->val_int(); + if (item_expr->null_value) + value= LONGLONG_MIN; + return value; +} + + +/* + The next set of functions are used to calculate the partition identity. + A handler sets up a variable that corresponds to one of these functions + to be able to quickly call it whenever the partition id needs to calculated + based on the record in table->record[0] (or set up to fake that). + There are 4 functions for hash partitioning and 2 for RANGE/LIST partitions. + In addition there are 4 variants for RANGE subpartitioning and 4 variants + for LIST subpartitioning thus in total there are 14 variants of this + function. + + We have a set of support functions for these 14 variants. There are 4 + variants of hash functions and there is a function for each. The KEY + partitioning uses the function calculate_key_value to calculate the hash + value based on an array of fields. The linear hash variants uses the + method get_part_id_from_linear_hash to get the partition id using the + hash value and some parameters calculated from the number of partitions. +*/ + +/* + Calculate hash value for KEY partitioning using an array of fields. + + SYNOPSIS + calculate_key_value() + field_array An array of the fields in KEY partitioning + + RETURN VALUE + hash_value calculated + + DESCRIPTION + Uses the hash function on the character set of the field. Integer and + floating point fields use the binary character set by default. +*/ + +static uint32 calculate_key_value(Field **field_array) +{ + uint32 hashnr= 0; + ulong nr2= 4; + + do + { + Field *field= *field_array; + if (field->is_null()) + { + hashnr^= (hashnr << 1) | 1; + } + else + { + uint len= field->pack_length(); + ulong nr1= 1; + CHARSET_INFO *cs= field->charset(); + cs->coll->hash_sort(cs, (uchar*)field->ptr, len, &nr1, &nr2); + hashnr^= (uint32)nr1; + } + } while (*(++field_array)); + return hashnr; +} + + +/* + A simple support function to calculate part_id given local part and + sub part. + + SYNOPSIS + get_part_id_for_sub() + loc_part_id Local partition id + sub_part_id Subpartition id + no_subparts Number of subparts +*/ + +inline +static uint32 get_part_id_for_sub(uint32 loc_part_id, uint32 sub_part_id, + uint no_subparts) +{ + return (uint32)((loc_part_id * no_subparts) + sub_part_id); +} + + +/* + Calculate part_id for (SUB)PARTITION BY HASH + + SYNOPSIS + get_part_id_hash() + no_parts Number of hash partitions + part_expr Item tree of hash function + out:func_value Value of hash function + + RETURN VALUE + Calculated partition id +*/ + +inline +static uint32 get_part_id_hash(uint no_parts, + Item *part_expr, + longlong *func_value) +{ + DBUG_ENTER("get_part_id_hash"); + *func_value= part_val_int(part_expr); + longlong int_hash_id= *func_value % no_parts; + DBUG_RETURN(int_hash_id < 0 ? -int_hash_id : int_hash_id); +} + + +/* + Calculate part_id for (SUB)PARTITION BY LINEAR HASH + + SYNOPSIS + get_part_id_linear_hash() + part_info A reference to the partition_info struct where all the + desired information is given + no_parts Number of hash partitions + part_expr Item tree of hash function + out:func_value Value of hash function + + RETURN VALUE + Calculated partition id +*/ + +inline +static uint32 get_part_id_linear_hash(partition_info *part_info, + uint no_parts, + Item *part_expr, + longlong *func_value) +{ + DBUG_ENTER("get_part_id_linear_hash"); + + *func_value= part_val_int(part_expr); + DBUG_RETURN(get_part_id_from_linear_hash(*func_value, + part_info->linear_hash_mask, + no_parts)); +} + + +/* + Calculate part_id for (SUB)PARTITION BY KEY + + SYNOPSIS + get_part_id_key() + field_array Array of fields for PARTTION KEY + no_parts Number of KEY partitions + + RETURN VALUE + Calculated partition id +*/ + +inline +static uint32 get_part_id_key(Field **field_array, + uint no_parts, + longlong *func_value) +{ + DBUG_ENTER("get_part_id_key"); + *func_value= calculate_key_value(field_array); + DBUG_RETURN(*func_value % no_parts); +} + + +/* + Calculate part_id for (SUB)PARTITION BY LINEAR KEY + + SYNOPSIS + get_part_id_linear_key() + part_info A reference to the partition_info struct where all the + desired information is given + field_array Array of fields for PARTTION KEY + no_parts Number of KEY partitions + + RETURN VALUE + Calculated partition id +*/ + +inline +static uint32 get_part_id_linear_key(partition_info *part_info, + Field **field_array, + uint no_parts, + longlong *func_value) +{ + DBUG_ENTER("get_partition_id_linear_key"); + + *func_value= calculate_key_value(field_array); + DBUG_RETURN(get_part_id_from_linear_hash(*func_value, + part_info->linear_hash_mask, + no_parts)); +} + +/* + This function is used to calculate the partition id where all partition + fields have been prepared to point to a record where the partition field + values are bound. + + SYNOPSIS + get_partition_id() + part_info A reference to the partition_info struct where all the + desired information is given + out:part_id The partition id is returned through this pointer + + RETURN VALUE + part_id + return TRUE means that the fields of the partition function didn't fit + into any partition and thus the values of the PF-fields are not allowed. + + DESCRIPTION + A routine used from write_row, update_row and delete_row from any + handler supporting partitioning. It is also a support routine for + get_partition_set used to find the set of partitions needed to scan + for a certain index scan or full table scan. + + It is actually 14 different variants of this function which are called + through a function pointer. + + get_partition_id_list + get_partition_id_range + get_partition_id_hash_nosub + get_partition_id_key_nosub + get_partition_id_linear_hash_nosub + get_partition_id_linear_key_nosub + get_partition_id_range_sub_hash + get_partition_id_range_sub_key + get_partition_id_range_sub_linear_hash + get_partition_id_range_sub_linear_key + get_partition_id_list_sub_hash + get_partition_id_list_sub_key + get_partition_id_list_sub_linear_hash + get_partition_id_list_sub_linear_key +*/ + +/* + This function is used to calculate the main partition to use in the case of + subpartitioning and we don't know enough to get the partition identity in + total. + + SYNOPSIS + get_part_partition_id() + part_info A reference to the partition_info struct where all the + desired information is given + out:part_id The partition id is returned through this pointer + + RETURN VALUE + part_id + return TRUE means that the fields of the partition function didn't fit + into any partition and thus the values of the PF-fields are not allowed. + + DESCRIPTION + + It is actually 6 different variants of this function which are called + through a function pointer. + + get_partition_id_list + get_partition_id_range + get_partition_id_hash_nosub + get_partition_id_key_nosub + get_partition_id_linear_hash_nosub + get_partition_id_linear_key_nosub +*/ + + +int get_partition_id_list(partition_info *part_info, + uint32 *part_id, + longlong *func_value) +{ + LIST_PART_ENTRY *list_array= part_info->list_array; + int list_index; + longlong list_value; + int min_list_index= 0; + int max_list_index= part_info->no_list_values - 1; + longlong part_func_value= part_val_int(part_info->part_expr); + DBUG_ENTER("get_partition_id_list"); + + *func_value= part_func_value; + while (max_list_index >= min_list_index) + { + list_index= (max_list_index + min_list_index) >> 1; + list_value= list_array[list_index].list_value; + if (list_value < part_func_value) + min_list_index= list_index + 1; + else if (list_value > part_func_value) + { + if (!list_index) + goto notfound; + max_list_index= list_index - 1; + } + else + { + *part_id= (uint32)list_array[list_index].partition_id; + DBUG_RETURN(0); + } + } +notfound: + *part_id= 0; + DBUG_RETURN(HA_ERR_NO_PARTITION_FOUND); +} + + +/* + Find the sub-array part_info->list_array that corresponds to given interval + + SYNOPSIS + get_list_array_idx_for_endpoint() + part_info Partitioning info (partitioning type must be LIST) + left_endpoint TRUE - the interval is [a; +inf) or (a; +inf) + FALSE - the interval is (-inf; a] or (-inf; a) + include_endpoint TRUE iff the interval includes the endpoint + + DESCRIPTION + This function finds the sub-array of part_info->list_array where values of + list_array[idx].list_value are contained within the specifed interval. + list_array is ordered by list_value, so + 1. For [a; +inf) or (a; +inf)-type intervals (left_endpoint==TRUE), the + sought sub-array starts at some index idx and continues till array end. + The function returns first number idx, such that + list_array[idx].list_value is contained within the passed interval. + + 2. For (-inf; a] or (-inf; a)-type intervals (left_endpoint==FALSE), the + sought sub-array starts at array start and continues till some last + index idx. + The function returns first number idx, such that + list_array[idx].list_value is NOT contained within the passed interval. + If all array elements are contained, part_info->no_list_values is + returned. + + NOTE + The caller will call this function and then will run along the sub-array of + list_array to collect partition ids. If the number of list values is + significantly higher then number of partitions, this could be slow and + we could invent some other approach. The "run over list array" part is + already wrapped in a get_next()-like function. + + RETURN + The edge of corresponding sub-array of part_info->list_array +*/ + +uint32 get_list_array_idx_for_endpoint(partition_info *part_info, + bool left_endpoint, + bool include_endpoint) +{ + DBUG_ENTER("get_list_array_idx_for_endpoint"); + LIST_PART_ENTRY *list_array= part_info->list_array; + uint list_index; + longlong list_value; + uint min_list_index= 0, max_list_index= part_info->no_list_values - 1; + /* Get the partitioning function value for the endpoint */ + longlong part_func_value= part_val_int(part_info->part_expr); + while (max_list_index >= min_list_index) + { + list_index= (max_list_index + min_list_index) >> 1; + list_value= list_array[list_index].list_value; + if (list_value < part_func_value) + min_list_index= list_index + 1; + else if (list_value > part_func_value) + { + if (!list_index) + goto notfound; + max_list_index= list_index - 1; + } + else + { + DBUG_RETURN(list_index + test(left_endpoint ^ include_endpoint)); + } + } +notfound: + if (list_value < part_func_value) + list_index++; + DBUG_RETURN(list_index); +} + + +int get_partition_id_range(partition_info *part_info, + uint32 *part_id, + longlong *func_value) +{ + longlong *range_array= part_info->range_int_array; + uint max_partition= part_info->no_parts - 1; + uint min_part_id= 0; + uint max_part_id= max_partition; + uint loc_part_id; + longlong part_func_value= part_val_int(part_info->part_expr); + DBUG_ENTER("get_partition_id_int_range"); + + while (max_part_id > min_part_id) + { + loc_part_id= (max_part_id + min_part_id + 1) >> 1; + if (range_array[loc_part_id] <= part_func_value) + min_part_id= loc_part_id + 1; + else + max_part_id= loc_part_id - 1; + } + loc_part_id= max_part_id; + if (part_func_value >= range_array[loc_part_id]) + if (loc_part_id != max_partition) + loc_part_id++; + *part_id= (uint32)loc_part_id; + *func_value= part_func_value; + if (loc_part_id == max_partition) + if (range_array[loc_part_id] != LONGLONG_MAX) + if (part_func_value >= range_array[loc_part_id]) + DBUG_RETURN(HA_ERR_NO_PARTITION_FOUND); + DBUG_RETURN(0); +} + + +/* + Find the sub-array of part_info->range_int_array that covers given interval + + SYNOPSIS + get_partition_id_range_for_endpoint() + part_info Partitioning info (partitioning type must be RANGE) + left_endpoint TRUE - the interval is [a; +inf) or (a; +inf) + FALSE - the interval is (-inf; a] or (-inf; a). + include_endpoint TRUE <=> the endpoint itself is included in the + interval + + DESCRIPTION + This function finds the sub-array of part_info->range_int_array where the + elements have non-empty intersections with the given interval. + + A range_int_array element at index idx represents the interval + + [range_int_array[idx-1], range_int_array[idx]), + + intervals are disjoint and ordered by their right bound, so + + 1. For [a; +inf) or (a; +inf)-type intervals (left_endpoint==TRUE), the + sought sub-array starts at some index idx and continues till array end. + The function returns first number idx, such that the interval + represented by range_int_array[idx] has non empty intersection with + the passed interval. + + 2. For (-inf; a] or (-inf; a)-type intervals (left_endpoint==FALSE), the + sought sub-array starts at array start and continues till some last + index idx. + The function returns first number idx, such that the interval + represented by range_int_array[idx] has EMPTY intersection with the + passed interval. + If the interval represented by the last array element has non-empty + intersection with the passed interval, part_info->no_parts is + returned. + + RETURN + The edge of corresponding part_info->range_int_array sub-array. +*/ + +uint32 get_partition_id_range_for_endpoint(partition_info *part_info, + bool left_endpoint, + bool include_endpoint) +{ + DBUG_ENTER("get_partition_id_range_for_endpoint"); + longlong *range_array= part_info->range_int_array; + uint max_partition= part_info->no_parts - 1; + uint min_part_id= 0, max_part_id= max_partition, loc_part_id; + /* Get the partitioning function value for the endpoint */ + longlong part_func_value= part_val_int(part_info->part_expr); + while (max_part_id > min_part_id) + { + loc_part_id= (max_part_id + min_part_id + 1) >> 1; + if (range_array[loc_part_id] <= part_func_value) + min_part_id= loc_part_id + 1; + else + max_part_id= loc_part_id - 1; + } + loc_part_id= max_part_id; + if (loc_part_id < max_partition && + part_func_value >= range_array[loc_part_id+1]) + { + loc_part_id++; + } + if (left_endpoint) + { + if (part_func_value >= range_array[loc_part_id]) + loc_part_id++; + } + else + { + if (part_func_value == range_array[loc_part_id]) + loc_part_id += test(include_endpoint); + else if (part_func_value > range_array[loc_part_id]) + loc_part_id++; + loc_part_id++; + } + DBUG_RETURN(loc_part_id); +} + + +int get_partition_id_hash_nosub(partition_info *part_info, + uint32 *part_id, + longlong *func_value) +{ + *part_id= get_part_id_hash(part_info->no_parts, part_info->part_expr, + func_value); + return 0; +} + + +int get_partition_id_linear_hash_nosub(partition_info *part_info, + uint32 *part_id, + longlong *func_value) +{ + *part_id= get_part_id_linear_hash(part_info, part_info->no_parts, + part_info->part_expr, func_value); + return 0; +} + + +int get_partition_id_key_nosub(partition_info *part_info, + uint32 *part_id, + longlong *func_value) +{ + *part_id= get_part_id_key(part_info->part_field_array, + part_info->no_parts, func_value); + return 0; +} + + +int get_partition_id_linear_key_nosub(partition_info *part_info, + uint32 *part_id, + longlong *func_value) +{ + *part_id= get_part_id_linear_key(part_info, + part_info->part_field_array, + part_info->no_parts, func_value); + return 0; +} + + +int get_partition_id_range_sub_hash(partition_info *part_info, + uint32 *part_id, + longlong *func_value) +{ + uint32 loc_part_id, sub_part_id; + uint no_subparts; + longlong local_func_value; + int error; + DBUG_ENTER("get_partition_id_range_sub_hash"); + + if (unlikely((error= get_partition_id_range(part_info, &loc_part_id, + func_value)))) + { + DBUG_RETURN(error); + } + no_subparts= part_info->no_subparts; + sub_part_id= get_part_id_hash(no_subparts, part_info->subpart_expr, + &local_func_value); + *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts); + DBUG_RETURN(0); +} + + +int get_partition_id_range_sub_linear_hash(partition_info *part_info, + uint32 *part_id, + longlong *func_value) +{ + uint32 loc_part_id, sub_part_id; + uint no_subparts; + longlong local_func_value; + int error; + DBUG_ENTER("get_partition_id_range_sub_linear_hash"); + + if (unlikely((error= get_partition_id_range(part_info, &loc_part_id, + func_value)))) + { + DBUG_RETURN(error); + } + no_subparts= part_info->no_subparts; + sub_part_id= get_part_id_linear_hash(part_info, no_subparts, + part_info->subpart_expr, + &local_func_value); + *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts); + DBUG_RETURN(0); +} + + +int get_partition_id_range_sub_key(partition_info *part_info, + uint32 *part_id, + longlong *func_value) +{ + uint32 loc_part_id, sub_part_id; + uint no_subparts; + longlong local_func_value; + int error; + DBUG_ENTER("get_partition_id_range_sub_key"); + + if (unlikely((error= get_partition_id_range(part_info, &loc_part_id, + func_value)))) + { + DBUG_RETURN(error); + } + no_subparts= part_info->no_subparts; + sub_part_id= get_part_id_key(part_info->subpart_field_array, + no_subparts, &local_func_value); + *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts); + DBUG_RETURN(0); +} + + +int get_partition_id_range_sub_linear_key(partition_info *part_info, + uint32 *part_id, + longlong *func_value) +{ + uint32 loc_part_id, sub_part_id; + uint no_subparts; + longlong local_func_value; + int error; + DBUG_ENTER("get_partition_id_range_sub_linear_key"); + + if (unlikely((error= get_partition_id_range(part_info, &loc_part_id, + func_value)))) + { + DBUG_RETURN(error); + } + no_subparts= part_info->no_subparts; + sub_part_id= get_part_id_linear_key(part_info, + part_info->subpart_field_array, + no_subparts, &local_func_value); + *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts); + DBUG_RETURN(0); +} + + +int get_partition_id_list_sub_hash(partition_info *part_info, + uint32 *part_id, + longlong *func_value) +{ + uint32 loc_part_id, sub_part_id; + uint no_subparts; + longlong local_func_value; + int error; + DBUG_ENTER("get_partition_id_list_sub_hash"); + + if (unlikely((error= get_partition_id_list(part_info, &loc_part_id, + func_value)))) + { + DBUG_RETURN(error); + } + no_subparts= part_info->no_subparts; + sub_part_id= get_part_id_hash(no_subparts, part_info->subpart_expr, + &local_func_value); + *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts); + DBUG_RETURN(0); +} + + +int get_partition_id_list_sub_linear_hash(partition_info *part_info, + uint32 *part_id, + longlong *func_value) +{ + uint32 loc_part_id, sub_part_id; + uint no_subparts; + longlong local_func_value; + int error; + DBUG_ENTER("get_partition_id_list_sub_linear_hash"); + + if (unlikely((error= get_partition_id_list(part_info, &loc_part_id, + func_value)))) + { + DBUG_RETURN(error); + } + no_subparts= part_info->no_subparts; + sub_part_id= get_part_id_linear_hash(part_info, no_subparts, + part_info->subpart_expr, + &local_func_value); + *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts); + DBUG_RETURN(0); +} + + +int get_partition_id_list_sub_key(partition_info *part_info, + uint32 *part_id, + longlong *func_value) +{ + uint32 loc_part_id, sub_part_id; + uint no_subparts; + longlong local_func_value; + int error; + DBUG_ENTER("get_partition_id_range_sub_key"); + + if (unlikely((error= get_partition_id_list(part_info, &loc_part_id, + func_value)))) + { + DBUG_RETURN(error); + } + no_subparts= part_info->no_subparts; + sub_part_id= get_part_id_key(part_info->subpart_field_array, + no_subparts, &local_func_value); + *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts); + DBUG_RETURN(0); +} + + +int get_partition_id_list_sub_linear_key(partition_info *part_info, + uint32 *part_id, + longlong *func_value) +{ + uint32 loc_part_id, sub_part_id; + uint no_subparts; + longlong local_func_value; + int error; + DBUG_ENTER("get_partition_id_list_sub_linear_key"); + + if (unlikely((error= get_partition_id_list(part_info, &loc_part_id, + func_value)))) + { + DBUG_RETURN(error); + } + no_subparts= part_info->no_subparts; + sub_part_id= get_part_id_linear_key(part_info, + part_info->subpart_field_array, + no_subparts, &local_func_value); + *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts); + DBUG_RETURN(0); +} + + +/* + This function is used to calculate the subpartition id + + SYNOPSIS + get_subpartition_id() + part_info A reference to the partition_info struct where all the + desired information is given + + RETURN VALUE + part_id The subpartition identity + + DESCRIPTION + A routine used in some SELECT's when only partial knowledge of the + partitions is known. + + It is actually 4 different variants of this function which are called + through a function pointer. + + get_partition_id_hash_sub + get_partition_id_key_sub + get_partition_id_linear_hash_sub + get_partition_id_linear_key_sub +*/ + +uint32 get_partition_id_hash_sub(partition_info *part_info) +{ + longlong func_value; + return get_part_id_hash(part_info->no_subparts, part_info->subpart_expr, + &func_value); +} + + +uint32 get_partition_id_linear_hash_sub(partition_info *part_info) +{ + longlong func_value; + return get_part_id_linear_hash(part_info, part_info->no_subparts, + part_info->subpart_expr, &func_value); +} + + +uint32 get_partition_id_key_sub(partition_info *part_info) +{ + longlong func_value; + return get_part_id_key(part_info->subpart_field_array, + part_info->no_subparts, &func_value); +} + + +uint32 get_partition_id_linear_key_sub(partition_info *part_info) +{ + longlong func_value; + return get_part_id_linear_key(part_info, + part_info->subpart_field_array, + part_info->no_subparts, &func_value); +} + + +/* + Set an indicator on all partition fields that are set by the key + + SYNOPSIS + set_PF_fields_in_key() + key_info Information about the index + key_length Length of key + + RETURN VALUE + TRUE Found partition field set by key + FALSE No partition field set by key +*/ + +static bool set_PF_fields_in_key(KEY *key_info, uint key_length) +{ + KEY_PART_INFO *key_part; + bool found_part_field= FALSE; + DBUG_ENTER("set_PF_fields_in_key"); + + for (key_part= key_info->key_part; (int)key_length > 0; key_part++) + { + if (key_part->null_bit) + key_length--; + if (key_part->type == HA_KEYTYPE_BIT) + { + if (((Field_bit*)key_part->field)->bit_len) + key_length--; + } + if (key_part->key_part_flag & (HA_BLOB_PART + HA_VAR_LENGTH_PART)) + { + key_length-= HA_KEY_BLOB_LENGTH; + } + if (key_length < key_part->length) + break; + key_length-= key_part->length; + if (key_part->field->flags & FIELD_IN_PART_FUNC_FLAG) + { + found_part_field= TRUE; + key_part->field->flags|= GET_FIXED_FIELDS_FLAG; + } + } + DBUG_RETURN(found_part_field); +} + + +/* + We have found that at least one partition field was set by a key, now + check if a partition function has all its fields bound or not. + + SYNOPSIS + check_part_func_bound() + ptr Array of fields NULL terminated (partition fields) + + RETURN VALUE + TRUE All fields in partition function are set + FALSE Not all fields in partition function are set +*/ + +static bool check_part_func_bound(Field **ptr) +{ + bool result= TRUE; + DBUG_ENTER("check_part_func_bound"); + + for (; *ptr; ptr++) + { + if (!((*ptr)->flags & GET_FIXED_FIELDS_FLAG)) + { + result= FALSE; + break; + } + } + DBUG_RETURN(result); +} + + +/* + Get the id of the subpartitioning part by using the key buffer of the + index scan. + + SYNOPSIS + get_sub_part_id_from_key() + table The table object + buf A buffer that can be used to evaluate the partition function + key_info The index object + key_spec A key_range containing key and key length + + RETURN VALUES + part_id Subpartition id to use + + DESCRIPTION + Use key buffer to set-up record in buf, move field pointers and + get the partition identity and restore field pointers afterwards. +*/ + +static uint32 get_sub_part_id_from_key(const TABLE *table,byte *buf, + KEY *key_info, + const key_range *key_spec) +{ + byte *rec0= table->record[0]; + partition_info *part_info= table->part_info; + uint32 part_id; + DBUG_ENTER("get_sub_part_id_from_key"); + + key_restore(buf, (byte*)key_spec->key, key_info, key_spec->length); + if (likely(rec0 == buf)) + part_id= part_info->get_subpartition_id(part_info); + else + { + Field **part_field_array= part_info->subpart_field_array; + set_field_ptr(part_field_array, buf, rec0); + part_id= part_info->get_subpartition_id(part_info); + set_field_ptr(part_field_array, rec0, buf); + } + DBUG_RETURN(part_id); +} + +/* + Get the id of the partitioning part by using the key buffer of the + index scan. + + SYNOPSIS + get_part_id_from_key() + table The table object + buf A buffer that can be used to evaluate the partition function + key_info The index object + key_spec A key_range containing key and key length + out:part_id Partition to use + + RETURN VALUES + TRUE Partition to use not found + FALSE Ok, part_id indicates partition to use + + DESCRIPTION + Use key buffer to set-up record in buf, move field pointers and + get the partition identity and restore field pointers afterwards. +*/ + +bool get_part_id_from_key(const TABLE *table, byte *buf, KEY *key_info, + const key_range *key_spec, uint32 *part_id) +{ + bool result; + byte *rec0= table->record[0]; + partition_info *part_info= table->part_info; + longlong func_value; + DBUG_ENTER("get_part_id_from_key"); + + key_restore(buf, (byte*)key_spec->key, key_info, key_spec->length); + if (likely(rec0 == buf)) + result= part_info->get_part_partition_id(part_info, part_id, + &func_value); + else + { + Field **part_field_array= part_info->part_field_array; + set_field_ptr(part_field_array, buf, rec0); + result= part_info->get_part_partition_id(part_info, part_id, + &func_value); + set_field_ptr(part_field_array, rec0, buf); + } + DBUG_RETURN(result); +} + +/* + Get the partitioning id of the full PF by using the key buffer of the + index scan. + + SYNOPSIS + get_full_part_id_from_key() + table The table object + buf A buffer that is used to evaluate the partition function + key_info The index object + key_spec A key_range containing key and key length + out:part_spec A partition id containing start part and end part + + RETURN VALUES + part_spec + No partitions to scan is indicated by end_part > start_part when returning + + DESCRIPTION + Use key buffer to set-up record in buf, move field pointers if needed and + get the partition identity and restore field pointers afterwards. +*/ + +void get_full_part_id_from_key(const TABLE *table, byte *buf, + KEY *key_info, + const key_range *key_spec, + part_id_range *part_spec) +{ + bool result; + partition_info *part_info= table->part_info; + byte *rec0= table->record[0]; + longlong func_value; + DBUG_ENTER("get_full_part_id_from_key"); + + key_restore(buf, (byte*)key_spec->key, key_info, key_spec->length); + if (likely(rec0 == buf)) + result= part_info->get_partition_id(part_info, &part_spec->start_part, + &func_value); + else + { + Field **part_field_array= part_info->full_part_field_array; + set_field_ptr(part_field_array, buf, rec0); + result= part_info->get_partition_id(part_info, &part_spec->start_part, + &func_value); + set_field_ptr(part_field_array, rec0, buf); + } + part_spec->end_part= part_spec->start_part; + if (unlikely(result)) + part_spec->start_part++; + DBUG_VOID_RETURN; +} + +/* + Prune the set of partitions to use in query + + SYNOPSIS + prune_partition_set() + table The table object + out:part_spec Contains start part, end part + + DESCRIPTION + This function is called to prune the range of partitions to scan by + checking the used_partitions bitmap. + If start_part > end_part at return it means no partition needs to be + scanned. If start_part == end_part it always means a single partition + needs to be scanned. + + RETURN VALUE + part_spec +*/ +void prune_partition_set(const TABLE *table, part_id_range *part_spec) +{ + int last_partition= -1; + uint i; + partition_info *part_info= table->part_info; + + DBUG_ENTER("prune_partition_set"); + for (i= part_spec->start_part; i <= part_spec->end_part; i++) + { + if (bitmap_is_set(&(part_info->used_partitions), i)) + { + DBUG_PRINT("info", ("Partition %d is set", i)); + if (last_partition == -1) + /* First partition found in set and pruned bitmap */ + part_spec->start_part= i; + last_partition= i; + } + } + if (last_partition == -1) + /* No partition found in pruned bitmap */ + part_spec->start_part= part_spec->end_part + 1; + else //if (last_partition != -1) + part_spec->end_part= last_partition; + + DBUG_VOID_RETURN; +} + +/* + Get the set of partitions to use in query. + + SYNOPSIS + get_partition_set() + table The table object + buf A buffer that can be used to evaluate the partition function + index The index of the key used, if MAX_KEY no index used + key_spec A key_range containing key and key length + out:part_spec Contains start part, end part and indicator if bitmap is + used for which partitions to scan + + DESCRIPTION + This function is called to discover which partitions to use in an index + scan or a full table scan. + It returns a range of partitions to scan. If there are holes in this + range with partitions that are not needed to scan a bit array is used + to signal which partitions to use and which not to use. + If start_part > end_part at return it means no partition needs to be + scanned. If start_part == end_part it always means a single partition + needs to be scanned. + + RETURN VALUE + part_spec +*/ +void get_partition_set(const TABLE *table, byte *buf, const uint index, + const key_range *key_spec, part_id_range *part_spec) +{ + partition_info *part_info= table->part_info; + uint no_parts= part_info->get_tot_partitions(); + uint i, part_id; + uint sub_part= no_parts; + uint32 part_part= no_parts; + KEY *key_info= NULL; + bool found_part_field= FALSE; + DBUG_ENTER("get_partition_set"); + + part_spec->start_part= 0; + part_spec->end_part= no_parts - 1; + if ((index < MAX_KEY) && + key_spec->flag == (uint)HA_READ_KEY_EXACT && + part_info->some_fields_in_PF.is_set(index)) + { + key_info= table->key_info+index; + /* + The index can potentially provide at least one PF-field (field in the + partition function). Thus it is interesting to continue our probe. + */ + if (key_spec->length == key_info->key_length) + { + /* + The entire key is set so we can check whether we can immediately + derive either the complete PF or if we can derive either + the top PF or the subpartitioning PF. This can be established by + checking precalculated bits on each index. + */ + if (part_info->all_fields_in_PF.is_set(index)) + { + /* + We can derive the exact partition to use, no more than this one + is needed. + */ + get_full_part_id_from_key(table,buf,key_info,key_spec,part_spec); + /* + Check if range can be adjusted by looking in used_partitions + */ + prune_partition_set(table, part_spec); + DBUG_VOID_RETURN; + } + else if (part_info->is_sub_partitioned()) + { + if (part_info->all_fields_in_SPF.is_set(index)) + sub_part= get_sub_part_id_from_key(table, buf, key_info, key_spec); + else if (part_info->all_fields_in_PPF.is_set(index)) + { + if (get_part_id_from_key(table,buf,key_info, + key_spec,(uint32*)&part_part)) + { + /* + The value of the RANGE or LIST partitioning was outside of + allowed values. Thus it is certain that the result of this + scan will be empty. + */ + part_spec->start_part= no_parts; + DBUG_VOID_RETURN; + } + } + } + } + else + { + /* + Set an indicator on all partition fields that are bound. + If at least one PF-field was bound it pays off to check whether + the PF or PPF or SPF has been bound. + (PF = Partition Function, SPF = Subpartition Function and + PPF = Partition Function part of subpartitioning) + */ + if ((found_part_field= set_PF_fields_in_key(key_info, + key_spec->length))) + { + if (check_part_func_bound(part_info->full_part_field_array)) + { + /* + We were able to bind all fields in the partition function even + by using only a part of the key. Calculate the partition to use. + */ + get_full_part_id_from_key(table,buf,key_info,key_spec,part_spec); + clear_indicator_in_key_fields(key_info); + /* + Check if range can be adjusted by looking in used_partitions + */ + prune_partition_set(table, part_spec); + DBUG_VOID_RETURN; + } + else if (part_info->is_sub_partitioned()) + { + if (check_part_func_bound(part_info->subpart_field_array)) + sub_part= get_sub_part_id_from_key(table, buf, key_info, key_spec); + else if (check_part_func_bound(part_info->part_field_array)) + { + if (get_part_id_from_key(table,buf,key_info,key_spec,&part_part)) + { + part_spec->start_part= no_parts; + clear_indicator_in_key_fields(key_info); + DBUG_VOID_RETURN; + } + } + } + } + } + } + { + /* + The next step is to analyse the table condition to see whether any + information about which partitions to scan can be derived from there. + Currently not implemented. + */ + } + /* + If we come here we have found a range of sorts we have either discovered + nothing or we have discovered a range of partitions with possible holes + in it. We need a bitvector to further the work here. + */ + if (!(part_part == no_parts && sub_part == no_parts)) + { + /* + We can only arrive here if we are using subpartitioning. + */ + if (part_part != no_parts) + { + /* + We know the top partition and need to scan all underlying + subpartitions. This is a range without holes. + */ + DBUG_ASSERT(sub_part == no_parts); + part_spec->start_part= part_part * part_info->no_subparts; + part_spec->end_part= part_spec->start_part+part_info->no_subparts - 1; + } + else + { + DBUG_ASSERT(sub_part != no_parts); + part_spec->start_part= sub_part; + part_spec->end_part=sub_part+ + (part_info->no_subparts*(part_info->no_parts-1)); + for (i= 0, part_id= sub_part; i < part_info->no_parts; + i++, part_id+= part_info->no_subparts) + ; //Set bit part_id in bit array + } + } + if (found_part_field) + clear_indicator_in_key_fields(key_info); + /* + Check if range can be adjusted by looking in used_partitions + */ + prune_partition_set(table, part_spec); + DBUG_VOID_RETURN; +} + +/* + If the table is partitioned we will read the partition info into the + .frm file here. + ------------------------------- + | Fileinfo 64 bytes | + ------------------------------- + | Formnames 7 bytes | + ------------------------------- + | Not used 4021 bytes | + ------------------------------- + | Keyinfo + record | + ------------------------------- + | Padded to next multiple | + | of IO_SIZE | + ------------------------------- + | Forminfo 288 bytes | + ------------------------------- + | Screen buffer, to make | + | field names readable | + ------------------------------- + | Packed field info | + | 17 + 1 + strlen(field_name) | + | + 1 end of file character | + ------------------------------- + | Partition info | + ------------------------------- + We provide the length of partition length in Fileinfo[55-58]. + + Read the partition syntax from the frm file and parse it to get the + data structures of the partitioning. + + SYNOPSIS + mysql_unpack_partition() + thd Thread object + part_buf Partition info from frm file + part_info_len Length of partition syntax + table Table object of partitioned table + create_table_ind Is it called from CREATE TABLE + default_db_type What is the default engine of the table + + RETURN VALUE + TRUE Error + FALSE Sucess + + DESCRIPTION + Read the partition syntax from the current position in the frm file. + Initiate a LEX object, save the list of item tree objects to free after + the query is done. Set-up partition info object such that parser knows + it is called from internally. Call parser to create data structures + (best possible recreation of item trees and so forth since there is no + serialisation of these objects other than in parseable text format). + We need to save the text of the partition functions since it is not + possible to retrace this given an item tree. +*/ + +bool mysql_unpack_partition(THD *thd, const uchar *part_buf, + uint part_info_len, + uchar *part_state, uint part_state_len, + TABLE* table, bool is_create_table_ind, + handlerton *default_db_type) +{ + Item *thd_free_list= thd->free_list; + bool result= TRUE; + partition_info *part_info; + CHARSET_INFO *old_character_set_client= thd->variables.character_set_client; + LEX *old_lex= thd->lex; + LEX lex; + DBUG_ENTER("mysql_unpack_partition"); + + thd->lex= &lex; + thd->variables.character_set_client= system_charset_info; + lex_start(thd, part_buf, part_info_len); + /* + We need to use the current SELECT_LEX since I need to keep the + Name_resolution_context object which is referenced from the + Item_field objects. + This is not a nice solution since if the parser uses current_select + for anything else it will corrupt the current LEX object. + */ + thd->lex->current_select= old_lex->current_select; + /* + All Items created is put into a free list on the THD object. This list + is used to free all Item objects after completing a query. We don't + want that to happen with the Item tree created as part of the partition + info. This should be attached to the table object and remain so until + the table object is released. + Thus we move away the current list temporarily and start a new list that + we then save in the partition info structure. + */ + thd->free_list= NULL; + lex.part_info= new partition_info();/* Indicates yyparse from this place */ + if (!lex.part_info) + { + mem_alloc_error(sizeof(partition_info)); + goto end; + } + lex.part_info->part_state= part_state; + lex.part_info->part_state_len= part_state_len; + DBUG_PRINT("info", ("Parse: %s", part_buf)); + if (yyparse((void*)thd) || thd->is_fatal_error) + { + free_items(thd->free_list); + goto end; + } + /* + The parsed syntax residing in the frm file can still contain defaults. + The reason is that the frm file is sometimes saved outside of this + MySQL Server and used in backup and restore of clusters or partitioned + tables. It is not certain that the restore will restore exactly the + same default partitioning. + + The easiest manner of handling this is to simply continue using the + part_info we already built up during mysql_create_table if we are + in the process of creating a table. If the table already exists we + need to discover the number of partitions for the default parts. Since + the handler object hasn't been created here yet we need to postpone this + to the fix_partition_func method. + */ + + DBUG_PRINT("info", ("Successful parse")); + part_info= lex.part_info; + DBUG_PRINT("info", ("default engine = %d, default_db_type = %d", + ha_legacy_type(part_info->default_engine_type), + ha_legacy_type(default_db_type))); + if (is_create_table_ind) + { + if (old_lex->name) + { + /* + This code is executed when we do a CREATE TABLE t1 LIKE t2 + old_lex->name contains the t2 and the table we are opening has + name t1. + */ + Table_ident *table_ident= (Table_ident *)old_lex->name; + char *src_db= table_ident->db.str ? table_ident->db.str : thd->db; + char *src_table= table_ident->table.str; + char buf[FN_REFLEN]; + build_table_filename(buf, sizeof(buf), src_db, src_table, ""); + if (partition_default_handling(table, part_info, buf)) + { + result= TRUE; + goto end; + } + } + else + part_info= old_lex->part_info; + } + table->part_info= part_info; + table->file->set_part_info(part_info); + if (part_info->default_engine_type == NULL) + { + part_info->default_engine_type= default_db_type; + } + else + { + DBUG_ASSERT(part_info->default_engine_type == default_db_type); + } + part_info->item_free_list= thd->free_list; + + { + /* + This code part allocates memory for the serialised item information for + the partition functions. In most cases this is not needed but if the + table is used for SHOW CREATE TABLES or ALTER TABLE that modifies + partition information it is needed and the info is lost if we don't + save it here so unfortunately we have to do it here even if in most + cases it is not needed. This is a consequence of that item trees are + not serialisable. + */ + uint part_func_len= part_info->part_func_len; + uint subpart_func_len= part_info->subpart_func_len; + char *part_func_string= NULL; + char *subpart_func_string= NULL; + if ((part_func_len && + !((part_func_string= thd->alloc(part_func_len)))) || + (subpart_func_len && + !((subpart_func_string= thd->alloc(subpart_func_len))))) + { + mem_alloc_error(part_func_len); + free_items(thd->free_list); + part_info->item_free_list= 0; + goto end; + } + if (part_func_len) + memcpy(part_func_string, part_info->part_func_string, part_func_len); + if (subpart_func_len) + memcpy(subpart_func_string, part_info->subpart_func_string, + subpart_func_len); + part_info->part_func_string= part_func_string; + part_info->subpart_func_string= subpart_func_string; + } + + result= FALSE; +end: + thd->free_list= thd_free_list; + thd->lex= old_lex; + thd->variables.character_set_client= old_character_set_client; + DBUG_RETURN(result); +} + + +/* + SYNOPSIS + fast_alter_partition_error_handler() + lpt Container for parameters + + RETURN VALUES + None + + DESCRIPTION + Support routine to clean up after failures of on-line ALTER TABLE + for partition management. +*/ + +static void fast_alter_partition_error_handler(ALTER_PARTITION_PARAM_TYPE *lpt) +{ + DBUG_ENTER("fast_alter_partition_error_handler"); + /* TODO: WL 2826 Error handling */ + DBUG_VOID_RETURN; +} + + +/* + SYNOPSIS + fast_end_partition() + thd Thread object + out:copied Number of records copied + out:deleted Number of records deleted + table_list Table list with the one table in it + empty Has nothing been done + lpt Struct to be used by error handler + + RETURN VALUES + FALSE Success + TRUE Failure + + DESCRIPTION + Support routine to handle the successful cases for partition + management. +*/ + +static int fast_end_partition(THD *thd, ulonglong copied, + ulonglong deleted, + TABLE_LIST *table_list, bool is_empty, + ALTER_PARTITION_PARAM_TYPE *lpt, + bool written_bin_log) +{ + int error; + DBUG_ENTER("fast_end_partition"); + + thd->proc_info="end"; + if (!is_empty) + query_cache_invalidate3(thd, table_list, 0); + error= ha_commit_stmt(thd); + if (ha_commit(thd)) + error= 1; + if (!error || is_empty) + { + char tmp_name[80]; + if ((!is_empty) && (!written_bin_log) && + (!thd->lex->no_write_to_binlog)) + write_bin_log(thd, FALSE, thd->query, thd->query_length); + close_thread_tables(thd); + my_snprintf(tmp_name, sizeof(tmp_name), ER(ER_INSERT_INFO), + (ulong) (copied + deleted), + (ulong) deleted, + (ulong) 0); + send_ok(thd,copied+deleted,0L,tmp_name); + DBUG_RETURN(FALSE); + } + fast_alter_partition_error_handler(lpt); + DBUG_RETURN(TRUE); +} + + +/* + Check engine mix that it is correct + SYNOPSIS + check_engine_condition() + p_elem Partition element + default_engine Have user specified engine on table level + inout::engine_type Current engine used + inout::first Is it first partition + RETURN VALUE + TRUE Failed check + FALSE Ok + DESCRIPTION + (specified partition handler ) specified table handler + (NDB, NDB) NDB OK + (MYISAM, MYISAM) - OK + (MYISAM, -) - NOT OK + (MYISAM, -) MYISAM OK + (- , MYISAM) - NOT OK + (- , -) MYISAM OK + (-,-) - OK + (NDB, MYISAM) * NOT OK +*/ + +static bool check_engine_condition(partition_element *p_elem, + bool default_engine, + handlerton **engine_type, + bool *first) +{ + if (*first && default_engine) + *engine_type= p_elem->engine_type; + *first= FALSE; + if ((!default_engine && + (p_elem->engine_type != *engine_type && + !p_elem->engine_type)) || + (default_engine && + p_elem->engine_type != *engine_type)) + return TRUE; + else + return FALSE; +} + +/* + We need to check if engine used by all partitions can handle + partitioning natively. + + SYNOPSIS + check_native_partitioned() + create_info Create info in CREATE TABLE + out:ret_val Return value + part_info Partition info + thd Thread object + + RETURN VALUES + Value returned in bool ret_value + TRUE Native partitioning supported by engine + FALSE Need to use partition handler + + Return value from function + TRUE Error + FALSE Success +*/ + +static bool check_native_partitioned(HA_CREATE_INFO *create_info,bool *ret_val, + partition_info *part_info, THD *thd) +{ + List_iterator<partition_element> part_it(part_info->partitions); + bool first= TRUE; + bool default_engine; + handlerton *engine_type= create_info->db_type; + handlerton *old_engine_type= engine_type; + uint i= 0; + handler *file; + uint no_parts= part_info->partitions.elements; + DBUG_ENTER("check_native_partitioned"); + + default_engine= (create_info->used_fields | HA_CREATE_USED_ENGINE) ? + TRUE : FALSE; + DBUG_PRINT("info", ("engine_type = %u, default = %u", + ha_legacy_type(engine_type), + default_engine)); + if (no_parts) + { + do + { + partition_element *part_elem= part_it++; + if (part_info->is_sub_partitioned() && + part_elem->subpartitions.elements) + { + uint no_subparts= part_elem->subpartitions.elements; + uint j= 0; + List_iterator<partition_element> sub_it(part_elem->subpartitions); + do + { + partition_element *sub_elem= sub_it++; + if (check_engine_condition(sub_elem, default_engine, + &engine_type, &first)) + goto error; + } while (++j < no_subparts); + /* + In case of subpartitioning and defaults we allow that only + subparts have specified engines, as long as the parts haven't + specified the wrong engine it's ok. + */ + if (check_engine_condition(part_elem, FALSE, + &engine_type, &first)) + goto error; + } + else if (check_engine_condition(part_elem, default_engine, + &engine_type, &first)) + goto error; + } while (++i < no_parts); + } + + /* + All engines are of the same type. Check if this engine supports + native partitioning. + */ + + if (!engine_type) + engine_type= old_engine_type; + DBUG_PRINT("info", ("engine_type = %s", + ha_resolve_storage_engine_name(engine_type))); + if (engine_type->partition_flags && + (engine_type->partition_flags() & HA_CAN_PARTITION)) + { + create_info->db_type= engine_type; + DBUG_PRINT("info", ("Changed to native partitioning")); + *ret_val= TRUE; + } + DBUG_RETURN(FALSE); +error: + /* + Mixed engines not yet supported but when supported it will need + the partition handler + */ + *ret_val= FALSE; + DBUG_RETURN(TRUE); +} + + +/* + Prepare for ALTER TABLE of partition structure + + SYNOPSIS + prep_alter_part_table() + thd Thread object + table Table object + inout:alter_info Alter information + inout:create_info Create info for CREATE TABLE + old_db_type Old engine type + out:partition_changed Boolean indicating whether partition changed + out:fast_alter_partition Boolean indicating whether fast partition + change is requested + + RETURN VALUES + TRUE Error + FALSE Success + partition_changed + fast_alter_partition + + DESCRIPTION + This method handles all preparations for ALTER TABLE for partitioned + tables + We need to handle both partition management command such as Add Partition + and others here as well as an ALTER TABLE that completely changes the + partitioning and yet others that don't change anything at all. We start + by checking the partition management variants and then check the general + change patterns. +*/ + +uint prep_alter_part_table(THD *thd, TABLE *table, ALTER_INFO *alter_info, + HA_CREATE_INFO *create_info, + handlerton *old_db_type, + bool *partition_changed, + uint *fast_alter_partition) +{ + DBUG_ENTER("prep_alter_part_table"); + + if (alter_info->flags & + (ALTER_ADD_PARTITION | ALTER_DROP_PARTITION | + ALTER_COALESCE_PARTITION | ALTER_REORGANIZE_PARTITION | + ALTER_TABLE_REORG | ALTER_OPTIMIZE_PARTITION | + ALTER_CHECK_PARTITION | ALTER_ANALYZE_PARTITION | + ALTER_REPAIR_PARTITION | ALTER_REBUILD_PARTITION)) + { + partition_info *tab_part_info= table->part_info; + if (!tab_part_info) + { + my_error(ER_PARTITION_MGMT_ON_NONPARTITIONED, MYF(0)); + DBUG_RETURN(TRUE); + } + /* + We are going to manipulate the partition info on the table object + so we need to ensure that the data structure of the table object + is freed by setting version to 0. table->s->version= 0 forces a + flush of the table object in close_thread_tables(). + */ + uint flags= 0; + table->s->version= 0L; + if (alter_info->flags == ALTER_TABLE_REORG) + { + uint new_part_no, curr_part_no; + ulonglong max_rows= table->s->max_rows; + if (tab_part_info->part_type != HASH_PARTITION || + tab_part_info->use_default_no_partitions) + { + my_error(ER_REORG_NO_PARAM_ERROR, MYF(0)); + DBUG_RETURN(TRUE); + } + new_part_no= table->file->get_default_no_partitions(max_rows); + curr_part_no= tab_part_info->no_parts; + if (new_part_no == curr_part_no) + { + /* + No change is needed, we will have the same number of partitions + after the change as before. Thus we can reply ok immediately + without any changes at all. + */ + DBUG_RETURN(fast_end_partition(thd, ULL(0), ULL(0), NULL, + TRUE, NULL, FALSE)); + } + else if (new_part_no > curr_part_no) + { + /* + We will add more partitions, we use the ADD PARTITION without + setting the flag for no default number of partitions + */ + alter_info->flags|= ALTER_ADD_PARTITION; + thd->lex->part_info->no_parts= new_part_no - curr_part_no; + } + else + { + /* + We will remove hash partitions, we use the COALESCE PARTITION + without setting the flag for no default number of partitions + */ + alter_info->flags|= ALTER_COALESCE_PARTITION; + alter_info->no_parts= curr_part_no - new_part_no; + } + } + if (table->s->db_type->alter_table_flags && + (!(flags= table->s->db_type->alter_table_flags(alter_info->flags)))) + { + my_error(ER_PARTITION_FUNCTION_FAILURE, MYF(0)); + DBUG_RETURN(1); + } + *fast_alter_partition= + ((flags & (HA_FAST_CHANGE_PARTITION | HA_PARTITION_ONE_PHASE)) != 0); + DBUG_PRINT("info", ("*fast_alter_partition: %d flags: 0x%x", + *fast_alter_partition, flags)); + if (alter_info->flags & ALTER_ADD_PARTITION) + { + /* + We start by moving the new partitions to the list of temporary + partitions. We will then check that the new partitions fit in the + partitioning scheme as currently set-up. + Partitions are always added at the end in ADD PARTITION. + */ + partition_info *alt_part_info= thd->lex->part_info; + uint no_new_partitions= alt_part_info->no_parts; + uint no_orig_partitions= tab_part_info->no_parts; + uint check_total_partitions= no_new_partitions + no_orig_partitions; + uint new_total_partitions= check_total_partitions; + /* + We allow quite a lot of values to be supplied by defaults, however we + must know the number of new partitions in this case. + */ + if (thd->lex->no_write_to_binlog && + tab_part_info->part_type != HASH_PARTITION) + { + my_error(ER_NO_BINLOG_ERROR, MYF(0)); + DBUG_RETURN(TRUE); + } + if (no_new_partitions == 0) + { + my_error(ER_ADD_PARTITION_NO_NEW_PARTITION, MYF(0)); + DBUG_RETURN(TRUE); + } + if (tab_part_info->is_sub_partitioned()) + { + if (alt_part_info->no_subparts == 0) + alt_part_info->no_subparts= tab_part_info->no_subparts; + else if (alt_part_info->no_subparts != tab_part_info->no_subparts) + { + my_error(ER_ADD_PARTITION_SUBPART_ERROR, MYF(0)); + DBUG_RETURN(TRUE); + } + check_total_partitions= new_total_partitions* + alt_part_info->no_subparts; + } + if (check_total_partitions > MAX_PARTITIONS) + { + my_error(ER_TOO_MANY_PARTITIONS_ERROR, MYF(0)); + DBUG_RETURN(TRUE); + } + alt_part_info->part_type= tab_part_info->part_type; + if (alt_part_info->set_up_defaults_for_partitioning(table->file, + ULL(0), + tab_part_info->no_parts)) + { + DBUG_RETURN(TRUE); + } +/* +Handling of on-line cases: + +ADD PARTITION for RANGE/LIST PARTITIONING: +------------------------------------------ +For range and list partitions add partition is simply adding a +new empty partition to the table. If the handler support this we +will use the simple method of doing this. The figure below shows +an example of this and the states involved in making this change. + +Existing partitions New added partitions +------ ------ ------ ------ | ------ ------ +| | | | | | | | | | | | | +| p0 | | p1 | | p2 | | p3 | | | p4 | | p5 | +------ ------ ------ ------ | ------ ------ +PART_NORMAL PART_NORMAL PART_NORMAL PART_NORMAL PART_TO_BE_ADDED*2 +PART_NORMAL PART_NORMAL PART_NORMAL PART_NORMAL PART_IS_ADDED*2 + +The first line is the states before adding the new partitions and the +second line is after the new partitions are added. All the partitions are +in the partitions list, no partitions are placed in the temp_partitions +list. + +ADD PARTITION for HASH PARTITIONING +----------------------------------- +This little figure tries to show the various partitions involved when +adding two new partitions to a linear hash based partitioned table with +four partitions to start with, which lists are used and the states they +pass through. Adding partitions to a normal hash based is similar except +that it is always all the existing partitions that are reorganised not +only a subset of them. + +Existing partitions New added partitions +------ ------ ------ ------ | ------ ------ +| | | | | | | | | | | | | +| p0 | | p1 | | p2 | | p3 | | | p4 | | p5 | +------ ------ ------ ------ | ------ ------ +PART_CHANGED PART_CHANGED PART_NORMAL PART_NORMAL PART_TO_BE_ADDED +PART_IS_CHANGED*2 PART_NORMAL PART_NORMAL PART_IS_ADDED +PART_NORMAL PART_NORMAL PART_NORMAL PART_NORMAL PART_IS_ADDED + +Reorganised existing partitions +------ ------ +| | | | +| p0'| | p1'| +------ ------ + +p0 - p5 will be in the partitions list of partitions. +p0' and p1' will actually not exist as separate objects, there presence can +be deduced from the state of the partition and also the names of those +partitions can be deduced this way. + +After adding the partitions and copying the partition data to p0', p1', +p4 and p5 from p0 and p1 the states change to adapt for the new situation +where p0 and p1 is dropped and replaced by p0' and p1' and the new p4 and +p5 are in the table again. + +The first line above shows the states of the partitions before we start +adding and copying partitions, the second after completing the adding +and copying and finally the third line after also dropping the partitions +that are reorganised. +*/ + if (*fast_alter_partition && + tab_part_info->part_type == HASH_PARTITION) + { + uint part_no= 0, start_part= 1, start_sec_part= 1; + uint end_part= 0, end_sec_part= 0; + uint upper_2n= tab_part_info->linear_hash_mask + 1; + uint lower_2n= upper_2n >> 1; + bool all_parts= TRUE; + if (tab_part_info->linear_hash_ind && + no_new_partitions < upper_2n) + { + /* + An analysis of which parts needs reorganisation shows that it is + divided into two intervals. The first interval is those parts + that are reorganised up until upper_2n - 1. From upper_2n and + onwards it starts again from partition 0 and goes on until + it reaches p(upper_2n - 1). If the last new partition reaches + beyond upper_2n - 1 then the first interval will end with + p(lower_2n - 1) and start with p(no_orig_partitions - lower_2n). + If lower_2n partitions are added then p0 to p(lower_2n - 1) will + be reorganised which means that the two interval becomes one + interval at this point. Thus only when adding less than + lower_2n partitions and going beyond a total of upper_2n we + actually get two intervals. + + To exemplify this assume we have 6 partitions to start with and + add 1, 2, 3, 5, 6, 7, 8, 9 partitions. + The first to add after p5 is p6 = 110 in bit numbers. Thus we + can see that 10 = p2 will be partition to reorganise if only one + partition. + If 2 partitions are added we reorganise [p2, p3]. Those two + cases are covered by the second if part below. + If 3 partitions are added we reorganise [p2, p3] U [p0,p0]. This + part is covered by the else part below. + If 5 partitions are added we get [p2,p3] U [p0, p2] = [p0, p3]. + This is covered by the first if part where we need the max check + to here use lower_2n - 1. + If 7 partitions are added we get [p2,p3] U [p0, p4] = [p0, p4]. + This is covered by the first if part but here we use the first + calculated end_part. + Finally with 9 new partitions we would also reorganise p6 if we + used the method below but we cannot reorganise more partitions + than what we had from the start and thus we simply set all_parts + to TRUE. In this case we don't get into this if-part at all. + */ + all_parts= FALSE; + if (no_new_partitions >= lower_2n) + { + /* + In this case there is only one interval since the two intervals + overlap and this starts from zero to last_part_no - upper_2n + */ + start_part= 0; + end_part= new_total_partitions - (upper_2n + 1); + end_part= max(lower_2n - 1, end_part); + } + else if (new_total_partitions <= upper_2n) + { + /* + Also in this case there is only one interval since we are not + going over a 2**n boundary + */ + start_part= no_orig_partitions - lower_2n; + end_part= start_part + (no_new_partitions - 1); + } + else + { + /* We have two non-overlapping intervals since we are not + passing a 2**n border and we have not at least lower_2n + new parts that would ensure that the intervals become + overlapping. + */ + start_part= no_orig_partitions - lower_2n; + end_part= upper_2n - 1; + start_sec_part= 0; + end_sec_part= new_total_partitions - (upper_2n + 1); + } + } + List_iterator<partition_element> tab_it(tab_part_info->partitions); + part_no= 0; + do + { + partition_element *p_elem= tab_it++; + if (all_parts || + (part_no >= start_part && part_no <= end_part) || + (part_no >= start_sec_part && part_no <= end_sec_part)) + { + p_elem->part_state= PART_CHANGED; + } + } while (++part_no < no_orig_partitions); + } + /* + Need to concatenate the lists here to make it possible to check the + partition info for correctness using check_partition_info. + For on-line add partition we set the state of this partition to + PART_TO_BE_ADDED to ensure that it is known that it is not yet + usable (becomes usable when partition is created and the switch of + partition configuration is made. + */ + { + List_iterator<partition_element> alt_it(alt_part_info->partitions); + uint part_count= 0; + do + { + partition_element *part_elem= alt_it++; + if (*fast_alter_partition) + part_elem->part_state= PART_TO_BE_ADDED; + if (tab_part_info->partitions.push_back(part_elem)) + { + mem_alloc_error(1); + DBUG_RETURN(TRUE); + } + } while (++part_count < no_new_partitions); + tab_part_info->no_parts+= no_new_partitions; + } + /* + If we specify partitions explicitly we don't use defaults anymore. + Using ADD PARTITION also means that we don't have the default number + of partitions anymore. We use this code also for Table reorganisations + and here we don't set any default flags to FALSE. + */ + if (!(alter_info->flags & ALTER_TABLE_REORG)) + { + if (!alt_part_info->use_default_partitions) + { + DBUG_PRINT("info", ("part_info= %x", tab_part_info)); + tab_part_info->use_default_partitions= FALSE; + } + tab_part_info->use_default_no_partitions= FALSE; + } + } + else if (alter_info->flags == ALTER_DROP_PARTITION) + { + /* + Drop a partition from a range partition and list partitioning is + always safe and can be made more or less immediate. It is necessary + however to ensure that the partition to be removed is safely removed + and that REPAIR TABLE can remove the partition if for some reason the + command to drop the partition failed in the middle. + */ + uint part_count= 0; + uint no_parts_dropped= alter_info->partition_names.elements; + uint no_parts_found= 0; + List_iterator<partition_element> part_it(tab_part_info->partitions); + if (!(tab_part_info->part_type == RANGE_PARTITION || + tab_part_info->part_type == LIST_PARTITION)) + { + my_error(ER_ONLY_ON_RANGE_LIST_PARTITION, MYF(0), "DROP"); + DBUG_RETURN(TRUE); + } + if (no_parts_dropped >= tab_part_info->no_parts) + { + my_error(ER_DROP_LAST_PARTITION, MYF(0)); + DBUG_RETURN(TRUE); + } + do + { + partition_element *part_elem= part_it++; + if (is_name_in_list(part_elem->partition_name, + alter_info->partition_names)) + { + /* + Set state to indicate that the partition is to be dropped. + */ + no_parts_found++; + part_elem->part_state= PART_TO_BE_DROPPED; + } + } while (++part_count < tab_part_info->no_parts); + if (no_parts_found != no_parts_dropped) + { + my_error(ER_DROP_PARTITION_NON_EXISTENT, MYF(0), "DROP"); + DBUG_RETURN(TRUE); + } + if (table->file->is_fk_defined_on_table_or_index(MAX_KEY)) + { + my_error(ER_ROW_IS_REFERENCED, MYF(0)); + DBUG_RETURN(TRUE); + } + } + else if ((alter_info->flags & ALTER_OPTIMIZE_PARTITION) || + (alter_info->flags & ALTER_ANALYZE_PARTITION) || + (alter_info->flags & ALTER_CHECK_PARTITION) || + (alter_info->flags & ALTER_REPAIR_PARTITION) || + (alter_info->flags & ALTER_REBUILD_PARTITION)) + { + uint no_parts_opt= alter_info->partition_names.elements; + uint part_count= 0; + uint no_parts_found= 0; + List_iterator<partition_element> part_it(tab_part_info->partitions); + + do + { + partition_element *part_elem= part_it++; + if ((alter_info->flags & ALTER_ALL_PARTITION) || + (is_name_in_list(part_elem->partition_name, + alter_info->partition_names))) + { + /* + Mark the partition as a partition to be "changed" by + analyzing/optimizing/rebuilding/checking/repairing + */ + no_parts_found++; + part_elem->part_state= PART_CHANGED; + } + } while (++part_count < tab_part_info->no_parts); + if (no_parts_found != no_parts_opt && + (!(alter_info->flags & ALTER_ALL_PARTITION))) + { + const char *ptr; + if (alter_info->flags & ALTER_OPTIMIZE_PARTITION) + ptr= "OPTIMIZE"; + else if (alter_info->flags & ALTER_ANALYZE_PARTITION) + ptr= "ANALYZE"; + else if (alter_info->flags & ALTER_CHECK_PARTITION) + ptr= "CHECK"; + else if (alter_info->flags & ALTER_REPAIR_PARTITION) + ptr= "REPAIR"; + else + ptr= "REBUILD"; + my_error(ER_DROP_PARTITION_NON_EXISTENT, MYF(0), ptr); + DBUG_RETURN(TRUE); + } + } + else if (alter_info->flags & ALTER_COALESCE_PARTITION) + { + uint no_parts_coalesced= alter_info->no_parts; + uint no_parts_remain= tab_part_info->no_parts - no_parts_coalesced; + List_iterator<partition_element> part_it(tab_part_info->partitions); + if (tab_part_info->part_type != HASH_PARTITION) + { + my_error(ER_COALESCE_ONLY_ON_HASH_PARTITION, MYF(0)); + DBUG_RETURN(TRUE); + } + if (no_parts_coalesced == 0) + { + my_error(ER_COALESCE_PARTITION_NO_PARTITION, MYF(0)); + DBUG_RETURN(TRUE); + } + if (no_parts_coalesced >= tab_part_info->no_parts) + { + my_error(ER_DROP_LAST_PARTITION, MYF(0)); + DBUG_RETURN(TRUE); + } +/* +Online handling: +COALESCE PARTITION: +------------------- +The figure below shows the manner in which partitions are handled when +performing an on-line coalesce partition and which states they go through +at start, after adding and copying partitions and finally after dropping +the partitions to drop. The figure shows an example using four partitions +to start with, using linear hash and coalescing one partition (always the +last partition). + +Using linear hash then all remaining partitions will have a new reorganised +part. + +Existing partitions Coalesced partition +------ ------ ------ | ------ +| | | | | | | | | +| p0 | | p1 | | p2 | | | p3 | +------ ------ ------ | ------ +PART_NORMAL PART_CHANGED PART_NORMAL PART_REORGED_DROPPED +PART_NORMAL PART_IS_CHANGED PART_NORMAL PART_TO_BE_DROPPED +PART_NORMAL PART_NORMAL PART_NORMAL PART_IS_DROPPED + +Reorganised existing partitions + ------ + | | + | p1'| + ------ + +p0 - p3 is in the partitions list. +The p1' partition will actually not be in any list it is deduced from the +state of p1. +*/ + { + uint part_count= 0, start_part= 1, start_sec_part= 1; + uint end_part= 0, end_sec_part= 0; + bool all_parts= TRUE; + if (*fast_alter_partition && + tab_part_info->linear_hash_ind) + { + uint upper_2n= tab_part_info->linear_hash_mask + 1; + uint lower_2n= upper_2n >> 1; + all_parts= FALSE; + if (no_parts_coalesced >= lower_2n) + { + all_parts= TRUE; + } + else if (no_parts_remain >= lower_2n) + { + end_part= tab_part_info->no_parts - (lower_2n + 1); + start_part= no_parts_remain - lower_2n; + } + else + { + start_part= 0; + end_part= tab_part_info->no_parts - (lower_2n + 1); + end_sec_part= (lower_2n >> 1) - 1; + start_sec_part= end_sec_part - (lower_2n - (no_parts_remain + 1)); + } + } + do + { + partition_element *p_elem= part_it++; + if (*fast_alter_partition && + (all_parts || + (part_count >= start_part && part_count <= end_part) || + (part_count >= start_sec_part && part_count <= end_sec_part))) + p_elem->part_state= PART_CHANGED; + if (++part_count > no_parts_remain) + { + if (*fast_alter_partition) + p_elem->part_state= PART_REORGED_DROPPED; + else + part_it.remove(); + } + } while (part_count < tab_part_info->no_parts); + tab_part_info->no_parts= no_parts_remain; + } + if (!(alter_info->flags & ALTER_TABLE_REORG)) + tab_part_info->use_default_no_partitions= FALSE; + } + else if (alter_info->flags == ALTER_REORGANIZE_PARTITION) + { + /* + Reorganise partitions takes a number of partitions that are next + to each other (at least for RANGE PARTITIONS) and then uses those + to create a set of new partitions. So data is copied from those + partitions into the new set of partitions. Those new partitions + can have more values in the LIST value specifications or less both + are allowed. The ranges can be different but since they are + changing a set of consecutive partitions they must cover the same + range as those changed from. + This command can be used on RANGE and LIST partitions. + */ + uint no_parts_reorged= alter_info->partition_names.elements; + uint no_parts_new= thd->lex->part_info->partitions.elements; + partition_info *alt_part_info= thd->lex->part_info; + uint check_total_partitions; + if (no_parts_reorged > tab_part_info->no_parts) + { + my_error(ER_REORG_PARTITION_NOT_EXIST, MYF(0)); + DBUG_RETURN(TRUE); + } + if (!(tab_part_info->part_type == RANGE_PARTITION || + tab_part_info->part_type == LIST_PARTITION) && + (no_parts_new != no_parts_reorged)) + { + my_error(ER_REORG_HASH_ONLY_ON_SAME_NO, MYF(0)); + DBUG_RETURN(TRUE); + } + check_total_partitions= tab_part_info->no_parts + no_parts_new; + check_total_partitions-= no_parts_reorged; + if (check_total_partitions > MAX_PARTITIONS) + { + my_error(ER_TOO_MANY_PARTITIONS_ERROR, MYF(0)); + DBUG_RETURN(TRUE); + } +/* +Online handling: +REORGANIZE PARTITION: +--------------------- +The figure exemplifies the handling of partitions, their state changes and +how they are organised. It exemplifies four partitions where two of the +partitions are reorganised (p1 and p2) into two new partitions (p4 and p5). +The reason of this change could be to change range limits, change list +values or for hash partitions simply reorganise the partition which could +also involve moving them to new disks or new node groups (MySQL Cluster). + +Existing partitions +------ ------ ------ ------ +| | | | | | | | +| p0 | | p1 | | p2 | | p3 | +------ ------ ------ ------ +PART_NORMAL PART_TO_BE_REORGED PART_NORMAL +PART_NORMAL PART_TO_BE_DROPPED PART_NORMAL +PART_NORMAL PART_IS_DROPPED PART_NORMAL + +Reorganised new partitions (replacing p1 and p2) +------ ------ +| | | | +| p4 | | p5 | +------ ------ +PART_TO_BE_ADDED +PART_IS_ADDED +PART_IS_ADDED + +All unchanged partitions and the new partitions are in the partitions list +in the order they will have when the change is completed. The reorganised +partitions are placed in the temp_partitions list. PART_IS_ADDED is only a +temporary state not written in the frm file. It is used to ensure we write +the generated partition syntax in a correct manner. +*/ + { + List_iterator<partition_element> tab_it(tab_part_info->partitions); + uint part_count= 0; + bool found_first= FALSE; + bool found_last= FALSE; + bool is_last_partition_reorged; + uint drop_count= 0; + longlong tab_max_range= 0, alt_max_range= 0; + do + { + partition_element *part_elem= tab_it++; + is_last_partition_reorged= FALSE; + if (is_name_in_list(part_elem->partition_name, + alter_info->partition_names)) + { + is_last_partition_reorged= TRUE; + drop_count++; + tab_max_range= part_elem->range_value; + if (*fast_alter_partition && + tab_part_info->temp_partitions.push_back(part_elem)) + { + mem_alloc_error(1); + DBUG_RETURN(TRUE); + } + if (*fast_alter_partition) + part_elem->part_state= PART_TO_BE_REORGED; + if (!found_first) + { + uint alt_part_count= 0; + found_first= TRUE; + List_iterator<partition_element> + alt_it(alt_part_info->partitions); + do + { + partition_element *alt_part_elem= alt_it++; + alt_max_range= alt_part_elem->range_value; + if (*fast_alter_partition) + alt_part_elem->part_state= PART_TO_BE_ADDED; + if (alt_part_count == 0) + tab_it.replace(alt_part_elem); + else + tab_it.after(alt_part_elem); + } while (++alt_part_count < no_parts_new); + } + else if (found_last) + { + my_error(ER_CONSECUTIVE_REORG_PARTITIONS, MYF(0)); + DBUG_RETURN(TRUE); + } + else + tab_it.remove(); + } + else + { + if (found_first) + found_last= TRUE; + } + } while (++part_count < tab_part_info->no_parts); + if (drop_count != no_parts_reorged) + { + my_error(ER_DROP_PARTITION_NON_EXISTENT, MYF(0), "REORGANIZE"); + DBUG_RETURN(TRUE); + } + if (tab_part_info->part_type == RANGE_PARTITION && + ((is_last_partition_reorged && + alt_max_range < tab_max_range) || + (!is_last_partition_reorged && + alt_max_range != tab_max_range))) + { + /* + For range partitioning the total resulting range before and + after the change must be the same except in one case. This is + when the last partition is reorganised, in this case it is + acceptable to increase the total range. + The reason is that it is not allowed to have "holes" in the + middle of the ranges and thus we should not allow to reorganise + to create "holes". Also we should not allow using REORGANIZE + to drop data. + */ + my_error(ER_REORG_OUTSIDE_RANGE, MYF(0)); + DBUG_RETURN(TRUE); + } + tab_part_info->no_parts= check_total_partitions; + } + } + else + { + DBUG_ASSERT(FALSE); + } + *partition_changed= TRUE; + thd->lex->part_info= tab_part_info; + if (alter_info->flags == ALTER_ADD_PARTITION || + alter_info->flags == ALTER_REORGANIZE_PARTITION) + { + if (check_partition_info(tab_part_info, (handlerton**)NULL, + table->file, ULL(0))) + { + DBUG_RETURN(TRUE); + } + } + } + else + { + /* + When thd->lex->part_info has a reference to a partition_info the + ALTER TABLE contained a definition of a partitioning. + + Case I: + If there was a partition before and there is a new one defined. + We use the new partitioning. The new partitioning is already + defined in the correct variable so no work is needed to + accomplish this. + We do however need to update partition_changed to ensure that not + only the frm file is changed in the ALTER TABLE command. + + Case IIa: + There was a partitioning before and there is no new one defined. + Also the user has not specified an explicit engine to use. + + We use the old partitioning also for the new table. We do this + by assigning the partition_info from the table loaded in + open_ltable to the partition_info struct used by mysql_create_table + later in this method. + + Case IIb: + There was a partitioning before and there is no new one defined. + The user has specified an explicit engine to use. + + Since the user has specified an explicit engine to use we override + the old partitioning info and create a new table using the specified + engine. This is the reason for the extra check if old and new engine + is equal. + In this case the partition also is changed. + + Case III: + There was no partitioning before altering the table, there is + partitioning defined in the altered table. Use the new partitioning. + No work needed since the partitioning info is already in the + correct variable. + + In this case we discover one case where the new partitioning is using + the same partition function as the default (PARTITION BY KEY or + PARTITION BY LINEAR KEY with the list of fields equal to the primary + key fields OR PARTITION BY [LINEAR] KEY() for tables without primary + key) + Also here partition has changed and thus a new table must be + created. + + Case IV: + There was no partitioning before and no partitioning defined. + Obviously no work needed. + */ + if (table->part_info) + { + if (!thd->lex->part_info && + create_info->db_type == old_db_type) + thd->lex->part_info= table->part_info; + } + if (thd->lex->part_info) + { + /* + Need to cater for engine types that can handle partition without + using the partition handler. + */ + if (thd->lex->part_info != table->part_info) + *partition_changed= TRUE; + if (create_info->db_type == &partition_hton) + { + if (table->part_info) + { + thd->lex->part_info->default_engine_type= + table->part_info->default_engine_type; + } + else + { + thd->lex->part_info->default_engine_type= + ha_checktype(thd, DB_TYPE_DEFAULT, FALSE, FALSE); + } + } + else + { + bool is_native_partitioned= FALSE; + partition_info *part_info= thd->lex->part_info; + part_info->default_engine_type= create_info->db_type; + if (check_native_partitioned(create_info, &is_native_partitioned, + part_info, thd)) + { + DBUG_RETURN(TRUE); + } + if (!is_native_partitioned) + { + DBUG_ASSERT(create_info->db_type != &default_hton); + create_info->db_type= &partition_hton; + } + } + DBUG_PRINT("info", ("default_db_type = %s", + thd->lex->part_info->default_engine_type->name)); + } + } + DBUG_RETURN(FALSE); +} + + +/* + Change partitions, used to implement ALTER TABLE ADD/REORGANIZE/COALESCE + partitions. This method is used to implement both single-phase and multi- + phase implementations of ADD/REORGANIZE/COALESCE partitions. + + SYNOPSIS + mysql_change_partitions() + lpt Struct containing parameters + + RETURN VALUES + TRUE Failure + FALSE Success + + DESCRIPTION + Request handler to add partitions as set in states of the partition + + Elements of the lpt parameters used: + create_info Create information used to create partitions + db Database name + table_name Table name + copied Output parameter where number of copied + records are added + deleted Output parameter where number of deleted + records are added +*/ + +static bool mysql_change_partitions(ALTER_PARTITION_PARAM_TYPE *lpt) +{ + char path[FN_REFLEN+1]; + DBUG_ENTER("mysql_change_partitions"); + + build_table_filename(path, sizeof(path), lpt->db, lpt->table_name, ""); + DBUG_RETURN(lpt->table->file->change_partitions(lpt->create_info, path, + &lpt->copied, + &lpt->deleted, + lpt->pack_frm_data, + lpt->pack_frm_len)); +} + + +/* + Rename partitions in an ALTER TABLE of partitions + + SYNOPSIS + mysql_rename_partitions() + lpt Struct containing parameters + + RETURN VALUES + TRUE Failure + FALSE Success + + DESCRIPTION + Request handler to rename partitions as set in states of the partition + + Parameters used: + db Database name + table_name Table name +*/ + +static bool mysql_rename_partitions(ALTER_PARTITION_PARAM_TYPE *lpt) +{ + char path[FN_REFLEN+1]; + DBUG_ENTER("mysql_rename_partitions"); + + build_table_filename(path, sizeof(path), lpt->db, lpt->table_name, ""); + DBUG_RETURN(lpt->table->file->rename_partitions(path)); +} + + +/* + Drop partitions in an ALTER TABLE of partitions + + SYNOPSIS + mysql_drop_partitions() + lpt Struct containing parameters + + RETURN VALUES + TRUE Failure + FALSE Success + DESCRIPTION + Drop the partitions marked with PART_TO_BE_DROPPED state and remove + those partitions from the list. + + Parameters used: + table Table object + db Database name + table_name Table name +*/ + +static bool mysql_drop_partitions(ALTER_PARTITION_PARAM_TYPE *lpt) +{ + char path[FN_REFLEN+1]; + partition_info *part_info= lpt->table->part_info; + List_iterator<partition_element> part_it(part_info->partitions); + uint i= 0; + uint remove_count= 0; + DBUG_ENTER("mysql_drop_partitions"); + + build_table_filename(path, sizeof(path), lpt->db, lpt->table_name, ""); + if (lpt->table->file->drop_partitions(path)) + { + DBUG_RETURN(TRUE); + } + do + { + partition_element *part_elem= part_it++; + if (part_elem->part_state == PART_IS_DROPPED) + { + part_it.remove(); + remove_count++; + } + } while (++i < part_info->no_parts); + part_info->no_parts-= remove_count; + DBUG_RETURN(FALSE); +} + + +/* + Actually perform the change requested by ALTER TABLE of partitions + previously prepared. + + SYNOPSIS + fast_alter_partition_table() + thd Thread object + table Table object + alter_info ALTER TABLE info + create_info Create info for CREATE TABLE + table_list List of the table involved + create_list The fields in the resulting table + key_list The keys in the resulting table + db Database name of new table + table_name Table name of new table + + RETURN VALUES + TRUE Error + FALSE Success + + DESCRIPTION + Perform all ALTER TABLE operations for partitioned tables that can be + performed fast without a full copy of the original table. +*/ + +uint fast_alter_partition_table(THD *thd, TABLE *table, + ALTER_INFO *alter_info, + HA_CREATE_INFO *create_info, + TABLE_LIST *table_list, + List<create_field> *create_list, + List<Key> *key_list, const char *db, + const char *table_name, + uint fast_alter_partition) +{ + /* Set-up struct used to write frm files */ + ulonglong copied= 0; + ulonglong deleted= 0; + partition_info *part_info= table->part_info; + ALTER_PARTITION_PARAM_TYPE lpt_obj; + ALTER_PARTITION_PARAM_TYPE *lpt= &lpt_obj; + bool written_bin_log= TRUE; + DBUG_ENTER("fast_alter_partition_table"); + + lpt->thd= thd; + lpt->create_info= create_info; + lpt->create_list= create_list; + lpt->key_list= key_list; + lpt->db_options= create_info->table_options; + if (create_info->row_type == ROW_TYPE_DYNAMIC) + lpt->db_options|= HA_OPTION_PACK_RECORD; + lpt->table= table; + lpt->key_info_buffer= 0; + lpt->key_count= 0; + lpt->db= db; + lpt->table_name= table_name; + lpt->copied= 0; + lpt->deleted= 0; + lpt->pack_frm_data= NULL; + lpt->pack_frm_len= 0; + thd->lex->part_info= part_info; + + if (alter_info->flags & ALTER_OPTIMIZE_PARTITION || + alter_info->flags & ALTER_ANALYZE_PARTITION || + alter_info->flags & ALTER_CHECK_PARTITION || + alter_info->flags & ALTER_REPAIR_PARTITION) + { + /* + In this case the user has specified that he wants a set of partitions + to be optimised and the partition engine can handle optimising + partitions natively without requiring a full rebuild of the + partitions. + + In this case it is enough to call optimise_partitions, there is no + need to change frm files or anything else. + */ + written_bin_log= FALSE; + if (((alter_info->flags & ALTER_OPTIMIZE_PARTITION) && + (table->file->optimize_partitions(thd))) || + ((alter_info->flags & ALTER_ANALYZE_PARTITION) && + (table->file->analyze_partitions(thd))) || + ((alter_info->flags & ALTER_CHECK_PARTITION) && + (table->file->check_partitions(thd))) || + ((alter_info->flags & ALTER_REPAIR_PARTITION) && + (table->file->repair_partitions(thd)))) + { + fast_alter_partition_error_handler(lpt); + DBUG_RETURN(TRUE); + } + } + else if (fast_alter_partition & HA_PARTITION_ONE_PHASE) + { + /* + In the case where the engine supports one phase online partition + changes it is not necessary to have any exclusive locks. The + correctness is upheld instead by transactions being aborted if they + access the table after its partition definition has changed (if they + are still using the old partition definition). + + The handler is in this case responsible to ensure that all users + start using the new frm file after it has changed. To implement + one phase it is necessary for the handler to have the master copy + of the frm file and use discovery mechanisms to renew it. Thus + write frm will write the frm, pack the new frm and finally + the frm is deleted and the discovery mechanisms will either restore + back to the old or installing the new after the change is activated. + + Thus all open tables will be discovered that they are old, if not + earlier as soon as they try an operation using the old table. One + should ensure that this is checked already when opening a table, + even if it is found in the cache of open tables. + + change_partitions will perform all operations and it is the duty of + the handler to ensure that the frm files in the system gets updated + in synch with the changes made and if an error occurs that a proper + error handling is done. + + If the MySQL Server crashes at this moment but the handler succeeds + in performing the change then the binlog is not written for the + change. There is no way to solve this as long as the binlog is not + transactional and even then it is hard to solve it completely. + + The first approach here was to downgrade locks. Now a different approach + is decided upon. The idea is that the handler will have access to the + ALTER_INFO when store_lock arrives with TL_WRITE_ALLOW_READ. So if the + handler knows that this functionality can be handled with a lower lock + level it will set the lock level to TL_WRITE_ALLOW_WRITE immediately. + Thus the need to downgrade the lock disappears. + 1) Write the new frm, pack it and then delete it + 2) Perform the change within the handler + */ + if ((mysql_write_frm(lpt, WFRM_INITIAL_WRITE | WFRM_PACK_FRM)) || + (mysql_change_partitions(lpt))) + { + fast_alter_partition_error_handler(lpt); + DBUG_RETURN(TRUE); + } + } + else if (alter_info->flags == ALTER_DROP_PARTITION) + { + /* + Now after all checks and setting state on dropped partitions we can + start the actual dropping of the partitions. + + Drop partition is actually two things happening. The first is that + a lot of records are deleted. The second is that the behaviour of + subsequent updates and writes and deletes will change. The delete + part can be handled without any particular high lock level by + transactional engines whereas non-transactional engines need to + ensure that this change is done with an exclusive lock on the table. + The second part, the change of partitioning does however require + an exclusive lock to install the new partitioning as one atomic + operation. If this is not the case, it is possible for two + transactions to see the change in a different order than their + serialisation order. Thus we need an exclusive lock for both + transactional and non-transactional engines. + + For LIST partitions it could be possible to avoid the exclusive lock + (and for RANGE partitions if they didn't rearrange range definitions + after a DROP PARTITION) if one ensured that failed accesses to the + dropped partitions was aborted for sure (thus only possible for + transactional engines). + + 1) Lock the table in TL_WRITE_ONLY to ensure all other accesses to + the table have completed + 2) Write the new frm file where the partitions have changed but are + still remaining with the state PART_TO_BE_DROPPED + 3) Write the bin log + 4) Prepare MyISAM handlers for drop of partitions + 5) Ensure that any users that has opened the table but not yet + reached the abort lock do that before downgrading the lock. + 6) Drop the partitions + 7) Write the frm file that the partition has been dropped + 8) Wait until all accesses using the old frm file has completed + 9) Complete query + */ + if ((abort_and_upgrade_lock(lpt)) || + (mysql_write_frm(lpt, WFRM_INITIAL_WRITE)) || + ((!thd->lex->no_write_to_binlog) && + (write_bin_log(thd, FALSE, + thd->query, thd->query_length), FALSE)) || + (table->file->extra(HA_EXTRA_PREPARE_FOR_DELETE)) || + (close_open_tables_and_downgrade(lpt), FALSE) || + (mysql_drop_partitions(lpt)) || + (mysql_write_frm(lpt, WFRM_CREATE_HANDLER_FILES)) || + (mysql_wait_completed_table(lpt, table), FALSE)) + { + fast_alter_partition_error_handler(lpt); + DBUG_RETURN(TRUE); + } + } + else if ((alter_info->flags & ALTER_ADD_PARTITION) && + (part_info->part_type == RANGE_PARTITION || + part_info->part_type == LIST_PARTITION)) + { + /* + ADD RANGE/LIST PARTITIONS + In this case there are no tuples removed and no tuples are added. + Thus the operation is merely adding a new partition. Thus it is + necessary to perform the change as an atomic operation. Otherwise + someone reading without seeing the new partition could potentially + miss updates made by a transaction serialised before it that are + inserted into the new partition. + + 1) Write the new frm file where state of added partitions is + changed to PART_TO_BE_ADDED + 2) Add the new partitions + 3) Lock all partitions in TL_WRITE_ONLY to ensure that no users + are still using the old partitioning scheme. Wait until all + ongoing users have completed before progressing. + 4) Write a new frm file of the table where the partitions are added + to the table. + 5) Write binlog + 6) Wait until all accesses using the old frm file has completed + 7) Complete query + */ + if ((mysql_write_frm(lpt, WFRM_INITIAL_WRITE)) || + (mysql_change_partitions(lpt)) || + (abort_and_upgrade_lock(lpt)) || + (mysql_write_frm(lpt, WFRM_CREATE_HANDLER_FILES)) || + ((!thd->lex->no_write_to_binlog) && + (write_bin_log(thd, FALSE, + thd->query, thd->query_length), FALSE)) || + (close_open_tables_and_downgrade(lpt), FALSE)) + { + fast_alter_partition_error_handler(lpt); + DBUG_RETURN(TRUE); + } + } + else + { + /* + ADD HASH PARTITION/ + COALESCE PARTITION/ + REBUILD PARTITION/ + REORGANIZE PARTITION + + In this case all records are still around after the change although + possibly organised into new partitions, thus by ensuring that all + updates go to both the old and the new partitioning scheme we can + actually perform this operation lock-free. The only exception to + this is when REORGANIZE PARTITION adds/drops ranges. In this case + there needs to be an exclusive lock during the time when the range + changes occur. + This is only possible if the handler can ensure double-write for a + period. The double write will ensure that it doesn't matter where the + data is read from since both places are updated for writes. If such + double writing is not performed then it is necessary to perform the + change with the usual exclusive lock. With double writes it is even + possible to perform writes in parallel with the reorganisation of + partitions. + + Without double write procedure we get the following procedure. + The only difference with using double write is that we can downgrade + the lock to TL_WRITE_ALLOW_WRITE. Double write in this case only + double writes from old to new. If we had double writing in both + directions we could perform the change completely without exclusive + lock for HASH partitions. + Handlers that perform double writing during the copy phase can actually + use a lower lock level. This can be handled inside store_lock in the + respective handler. + + 1) Write the new frm file where state of added partitions is + changed to PART_TO_BE_ADDED and the reorganised partitions + are set in state PART_TO_BE_REORGED. + 2) Add the new partitions + Copy from the reorganised partitions to the new partitions + 3) Lock all partitions in TL_WRITE_ONLY to ensure that no users + are still using the old partitioning scheme. Wait until all + ongoing users have completed before progressing. + 4) Prepare MyISAM handlers for rename and delete of partitions + 5) Write a new frm file of the table where the partitions are + reorganised. + 6) Rename the reorged partitions such that they are no longer + used and rename those added to their real new names. + 7) Write bin log + 8) Wait until all accesses using the old frm file has completed + 9) Drop the reorganised partitions + 10)Write a new frm file of the table where the partitions are + reorganised. + 11)Wait until all accesses using the old frm file has completed + 12)Complete query + */ + + if ((mysql_write_frm(lpt, WFRM_INITIAL_WRITE)) || + (mysql_change_partitions(lpt)) || + (abort_and_upgrade_lock(lpt)) || + (mysql_write_frm(lpt, WFRM_CREATE_HANDLER_FILES)) || + (table->file->extra(HA_EXTRA_PREPARE_FOR_DELETE)) || + (mysql_rename_partitions(lpt)) || + ((!thd->lex->no_write_to_binlog) && + (write_bin_log(thd, FALSE, + thd->query, thd->query_length), FALSE)) || + (close_open_tables_and_downgrade(lpt), FALSE) || + (mysql_drop_partitions(lpt)) || + (mysql_write_frm(lpt, 0UL)) || + (mysql_wait_completed_table(lpt, table), FALSE)) + { + fast_alter_partition_error_handler(lpt); + DBUG_RETURN(TRUE); + } + } + /* + A final step is to write the query to the binlog and send ok to the + user + */ + DBUG_RETURN(fast_end_partition(thd, lpt->copied, lpt->deleted, + table_list, FALSE, lpt, + written_bin_log)); +} +#endif + + +/* + Prepare for calling val_int on partition function by setting fields to + point to the record where the values of the PF-fields are stored. + + SYNOPSIS + set_field_ptr() + ptr Array of fields to change ptr + new_buf New record pointer + old_buf Old record pointer + + DESCRIPTION + Set ptr in field objects of field array to refer to new_buf record + instead of previously old_buf. Used before calling val_int and after + it is used to restore pointers to table->record[0]. + This routine is placed outside of partition code since it can be useful + also for other programs. +*/ + +void set_field_ptr(Field **ptr, const byte *new_buf, + const byte *old_buf) +{ + my_ptrdiff_t diff= (new_buf - old_buf); + DBUG_ENTER("set_field_ptr"); + + do + { + (*ptr)->move_field_offset(diff); + } while (*(++ptr)); + DBUG_VOID_RETURN; +} + + +/* + Prepare for calling val_int on partition function by setting fields to + point to the record where the values of the PF-fields are stored. + This variant works on a key_part reference. + It is not required that all fields are NOT NULL fields. + + SYNOPSIS + set_key_field_ptr() + key_info key info with a set of fields to change ptr + new_buf New record pointer + old_buf Old record pointer + + DESCRIPTION + Set ptr in field objects of field array to refer to new_buf record + instead of previously old_buf. Used before calling val_int and after + it is used to restore pointers to table->record[0]. + This routine is placed outside of partition code since it can be useful + also for other programs. +*/ + +void set_key_field_ptr(KEY *key_info, const byte *new_buf, + const byte *old_buf) +{ + KEY_PART_INFO *key_part= key_info->key_part; + uint key_parts= key_info->key_parts; + uint i= 0; + my_ptrdiff_t diff= (new_buf - old_buf); + DBUG_ENTER("set_key_field_ptr"); + + do + { + key_part->field->move_field_offset(diff); + key_part++; + } while (++i < key_parts); + DBUG_VOID_RETURN; +} + + +/* + SYNOPSIS + mem_alloc_error() + size Size of memory attempted to allocate + None + + RETURN VALUES + None + + DESCRIPTION + A routine to use for all the many places in the code where memory + allocation error can happen, a tremendous amount of them, needs + simple routine that signals this error. +*/ + +void mem_alloc_error(size_t size) +{ + my_error(ER_OUTOFMEMORY, MYF(0), size); +} + +#ifdef WITH_PARTITION_STORAGE_ENGINE +/* + Return comma-separated list of used partitions in the provided given string + + SYNOPSIS + make_used_partitions_str() + part_info IN Partitioning info + parts_str OUT The string to fill + + DESCRIPTION + Generate a list of used partitions (from bits in part_info->used_partitions + bitmap), asd store it into the provided String object. + + NOTE + The produced string must not be longer then MAX_PARTITIONS * (1 + FN_LEN). +*/ + +void make_used_partitions_str(partition_info *part_info, String *parts_str) +{ + parts_str->length(0); + partition_element *pe; + uint partition_id= 0; + List_iterator<partition_element> it(part_info->partitions); + + if (part_info->is_sub_partitioned()) + { + partition_element *head_pe; + while ((head_pe= it++)) + { + List_iterator<partition_element> it2(head_pe->subpartitions); + while ((pe= it2++)) + { + if (bitmap_is_set(&part_info->used_partitions, partition_id)) + { + if (parts_str->length()) + parts_str->append(','); + parts_str->append(head_pe->partition_name, + strlen(head_pe->partition_name), + system_charset_info); + parts_str->append('_'); + parts_str->append(pe->partition_name, + strlen(pe->partition_name), + system_charset_info); + } + partition_id++; + } + } + } + else + { + while ((pe= it++)) + { + if (bitmap_is_set(&part_info->used_partitions, partition_id)) + { + if (parts_str->length()) + parts_str->append(','); + parts_str->append(pe->partition_name, strlen(pe->partition_name), + system_charset_info); + } + partition_id++; + } + } +} +#endif + +/**************************************************************************** + * Partition interval analysis support + ***************************************************************************/ + +/* + Setup partition_info::* members related to partitioning range analysis + + SYNOPSIS + set_up_partition_func_pointers() + part_info Partitioning info structure + + DESCRIPTION + Assuming that passed partition_info structure already has correct values + for members that specify [sub]partitioning type, table fields, and + functions, set up partition_info::* members that are related to + Partitioning Interval Analysis (see get_partitions_in_range_iter for its + definition) + + IMPLEMENTATION + There are two available interval analyzer functions: + (1) get_part_iter_for_interval_via_mapping + (2) get_part_iter_for_interval_via_walking + + They both have limited applicability: + (1) is applicable for "PARTITION BY <RANGE|LIST>(func(t.field))", where + func is a monotonic function. + + (2) is applicable for + "[SUB]PARTITION BY <any-partitioning-type>(any_func(t.integer_field))" + + If both are applicable, (1) is preferred over (2). + + This function sets part_info::get_part_iter_for_interval according to + this criteria, and also sets some auxilary fields that the function + uses. +*/ +#ifdef WITH_PARTITION_STORAGE_ENGINE +static void set_up_range_analysis_info(partition_info *part_info) +{ + enum_monotonicity_info minfo; + + /* Set the catch-all default */ + part_info->get_part_iter_for_interval= NULL; + part_info->get_subpart_iter_for_interval= NULL; + + /* + Check if get_part_iter_for_interval_via_mapping() can be used for + partitioning + */ + switch (part_info->part_type) { + case RANGE_PARTITION: + case LIST_PARTITION: + minfo= part_info->part_expr->get_monotonicity_info(); + if (minfo != NON_MONOTONIC) + { + part_info->range_analysis_include_bounds= + test(minfo == MONOTONIC_INCREASING); + part_info->get_part_iter_for_interval= + get_part_iter_for_interval_via_mapping; + goto setup_subparts; + } + default: + ; + } + + /* + Check get_part_iter_for_interval_via_walking() can be used for + partitioning + */ + if (part_info->no_part_fields == 1) + { + Field *field= part_info->part_field_array[0]; + switch (field->type()) { + case MYSQL_TYPE_TINY: + case MYSQL_TYPE_SHORT: + case MYSQL_TYPE_LONG: + case MYSQL_TYPE_LONGLONG: + part_info->get_part_iter_for_interval= + get_part_iter_for_interval_via_walking; + break; + default: + ; + } + } + +setup_subparts: + /* + Check get_part_iter_for_interval_via_walking() can be used for + subpartitioning + */ + if (part_info->no_subpart_fields == 1) + { + Field *field= part_info->subpart_field_array[0]; + switch (field->type()) { + case MYSQL_TYPE_TINY: + case MYSQL_TYPE_SHORT: + case MYSQL_TYPE_LONG: + case MYSQL_TYPE_LONGLONG: + part_info->get_subpart_iter_for_interval= + get_part_iter_for_interval_via_walking; + break; + default: + ; + } + } +} + + +typedef uint32 (*get_endpoint_func)(partition_info*, bool left_endpoint, + bool include_endpoint); + +/* + Partitioning Interval Analysis: Initialize the iterator for "mapping" case + + SYNOPSIS + get_part_iter_for_interval_via_mapping() + part_info Partition info + is_subpart TRUE - act for subpartitioning + FALSE - act for partitioning + min_value minimum field value, in opt_range key format. + max_value minimum field value, in opt_range key format. + flags Some combination of NEAR_MIN, NEAR_MAX, NO_MIN_RANGE, + NO_MAX_RANGE. + part_iter Iterator structure to be initialized + + DESCRIPTION + Initialize partition set iterator to walk over the interval in + ordered-array-of-partitions (for RANGE partitioning) or + ordered-array-of-list-constants (for LIST partitioning) space. + + IMPLEMENTATION + This function is used when partitioning is done by + <RANGE|LIST>(ascending_func(t.field)), and we can map an interval in + t.field space into a sub-array of partition_info::range_int_array or + partition_info::list_array (see get_partition_id_range_for_endpoint, + get_list_array_idx_for_endpoint for details). + + The function performs this interval mapping, and sets the iterator to + traverse the sub-array and return appropriate partitions. + + RETURN + 0 - No matching partitions (iterator not initialized) + 1 - Ok, iterator intialized for traversal of matching partitions. + -1 - All partitions would match (iterator not initialized) +*/ + +int get_part_iter_for_interval_via_mapping(partition_info *part_info, + bool is_subpart, + char *min_value, char *max_value, + uint flags, + PARTITION_ITERATOR *part_iter) +{ + DBUG_ASSERT(!is_subpart); + Field *field= part_info->part_field_array[0]; + uint32 max_endpoint_val; + get_endpoint_func get_endpoint; + uint field_len= field->pack_length_in_rec(); + + if (part_info->part_type == RANGE_PARTITION) + { + get_endpoint= get_partition_id_range_for_endpoint; + max_endpoint_val= part_info->no_parts; + part_iter->get_next= get_next_partition_id_range; + } + else if (part_info->part_type == LIST_PARTITION) + { + get_endpoint= get_list_array_idx_for_endpoint; + max_endpoint_val= part_info->no_list_values; + part_iter->get_next= get_next_partition_id_list; + part_iter->part_info= part_info; + } + else + DBUG_ASSERT(0); + + /* Find minimum */ + if (flags & NO_MIN_RANGE) + part_iter->part_nums.start= 0; + else + { + /* + Store the interval edge in the record buffer, and call the + function that maps the edge in table-field space to an edge + in ordered-set-of-partitions (for RANGE partitioning) or + index-in-ordered-array-of-list-constants (for LIST) space. + */ + store_key_image_to_rec(field, min_value, field_len); + bool include_endp= part_info->range_analysis_include_bounds || + !test(flags & NEAR_MIN); + part_iter->part_nums.start= get_endpoint(part_info, 1, include_endp); + if (part_iter->part_nums.start == max_endpoint_val) + return 0; /* No partitions */ + } + + /* Find maximum, do the same as above but for right interval bound */ + if (flags & NO_MAX_RANGE) + part_iter->part_nums.end= max_endpoint_val; + else + { + store_key_image_to_rec(field, max_value, field_len); + bool include_endp= part_info->range_analysis_include_bounds || + !test(flags & NEAR_MAX); + part_iter->part_nums.end= get_endpoint(part_info, 0, include_endp); + if (part_iter->part_nums.start== part_iter->part_nums.end) + return 0; /* No partitions */ + } + return 1; /* Ok, iterator initialized */ +} + + +/* See get_part_iter_for_interval_via_walking for definition of what this is */ +#define MAX_RANGE_TO_WALK 10 + + +/* + Partitioning Interval Analysis: Initialize iterator to walk field interval + + SYNOPSIS + get_part_iter_for_interval_via_walking() + part_info Partition info + is_subpart TRUE - act for subpartitioning + FALSE - act for partitioning + min_value minimum field value, in opt_range key format. + max_value minimum field value, in opt_range key format. + flags Some combination of NEAR_MIN, NEAR_MAX, NO_MIN_RANGE, + NO_MAX_RANGE. + part_iter Iterator structure to be initialized + + DESCRIPTION + Initialize partition set iterator to walk over interval in integer field + space. That is, for "const1 <=? t.field <=? const2" interval, initialize + the iterator to return a set of [sub]partitions obtained with the + following procedure: + get partition id for t.field = const1, return it + get partition id for t.field = const1+1, return it + ... t.field = const1+2, ... + ... ... ... + ... t.field = const2 ... + + IMPLEMENTATION + See get_partitions_in_range_iter for general description of interval + analysis. We support walking over the following intervals: + "t.field IS NULL" + "c1 <=? t.field <=? c2", where c1 and c2 are finite. + Intervals with +inf/-inf, and [NULL, c1] interval can be processed but + that is more tricky and I don't have time to do it right now. + + Additionally we have these requirements: + * number of values in the interval must be less then number of + [sub]partitions, and + * Number of values in the interval must be less then MAX_RANGE_TO_WALK. + + The rationale behind these requirements is that if they are not met + we're likely to hit most of the partitions and traversing the interval + will only add overhead. So it's better return "all partitions used" in + that case. + + RETURN + 0 - No matching partitions, iterator not initialized + 1 - Some partitions would match, iterator intialized for traversing them + -1 - All partitions would match, iterator not initialized +*/ + +int get_part_iter_for_interval_via_walking(partition_info *part_info, + bool is_subpart, + char *min_value, char *max_value, + uint flags, + PARTITION_ITERATOR *part_iter) +{ + Field *field; + uint total_parts; + partition_iter_func get_next_func; + if (is_subpart) + { + field= part_info->subpart_field_array[0]; + total_parts= part_info->no_subparts; + get_next_func= get_next_subpartition_via_walking; + } + else + { + field= part_info->part_field_array[0]; + total_parts= part_info->no_parts; + get_next_func= get_next_partition_via_walking; + } + + /* Handle the "t.field IS NULL" interval, it is a special case */ + if (field->real_maybe_null() && !(flags & (NO_MIN_RANGE | NO_MAX_RANGE)) && + *min_value && *max_value) + { + /* + We don't have a part_iter->get_next() function that would find which + partition "t.field IS NULL" belongs to, so find partition that contains + NULL right here, and return an iterator over singleton set. + */ + uint32 part_id; + field->set_null(); + if (is_subpart) + { + part_id= part_info->get_subpartition_id(part_info); + init_single_partition_iterator(part_id, part_iter); + return 1; /* Ok, iterator initialized */ + } + else + { + longlong dummy; + if (!part_info->get_partition_id(part_info, &part_id, &dummy)) + { + init_single_partition_iterator(part_id, part_iter); + return 1; /* Ok, iterator initialized */ + } + } + return 0; /* No partitions match */ + } + + if (flags & (NO_MIN_RANGE | NO_MAX_RANGE)) + return -1; /* Can't handle this interval, have to use all partitions */ + + /* Get integers for left and right interval bound */ + longlong a, b; + uint len= field->pack_length_in_rec(); + store_key_image_to_rec(field, min_value, len); + a= field->val_int(); + + store_key_image_to_rec(field, max_value, len); + b= field->val_int(); + + a += test(flags & NEAR_MIN); + b += test(!(flags & NEAR_MAX)); + uint n_values= b - a; + + if (n_values > total_parts || n_values > MAX_RANGE_TO_WALK) + return -1; + + part_iter->field_vals.start= a; + part_iter->field_vals.end= b; + part_iter->part_info= part_info; + part_iter->get_next= get_next_func; + return 1; +} + + +/* + PARTITION_ITERATOR::get_next implementation: enumerate partitions in range + + SYNOPSIS + get_next_partition_id_list() + part_iter Partition set iterator structure + + DESCRIPTION + This is implementation of PARTITION_ITERATOR::get_next() that returns + [sub]partition ids in [min_partition_id, max_partition_id] range. + + RETURN + partition id + NOT_A_PARTITION_ID if there are no more partitions +*/ + +uint32 get_next_partition_id_range(PARTITION_ITERATOR* part_iter) +{ + if (part_iter->part_nums.start== part_iter->part_nums.end) + return NOT_A_PARTITION_ID; + else + return part_iter->part_nums.start++; +} + + +/* + PARTITION_ITERATOR::get_next implementation for LIST partitioning + + SYNOPSIS + get_next_partition_id_list() + part_iter Partition set iterator structure + + DESCRIPTION + This implementation of PARTITION_ITERATOR::get_next() is special for + LIST partitioning: it enumerates partition ids in + part_info->list_array[i] where i runs over [min_idx, max_idx] interval. + + RETURN + partition id + NOT_A_PARTITION_ID if there are no more partitions +*/ + +uint32 get_next_partition_id_list(PARTITION_ITERATOR *part_iter) +{ + if (part_iter->part_nums.start == part_iter->part_nums.end) + return NOT_A_PARTITION_ID; + else + return part_iter->part_info->list_array[part_iter-> + part_nums.start++].partition_id; +} + + +/* + PARTITION_ITERATOR::get_next implementation: walk over field-space interval + + SYNOPSIS + get_next_partition_via_walking() + part_iter Partitioning iterator + + DESCRIPTION + This implementation of PARTITION_ITERATOR::get_next() returns ids of + partitions that contain records with partitioning field value within + [start_val, end_val] interval. + + RETURN + partition id + NOT_A_PARTITION_ID if there are no more partitioning. +*/ + +static uint32 get_next_partition_via_walking(PARTITION_ITERATOR *part_iter) +{ + uint32 part_id; + Field *field= part_iter->part_info->part_field_array[0]; + while (part_iter->field_vals.start != part_iter->field_vals.end) + { + field->store(part_iter->field_vals.start, FALSE); + part_iter->field_vals.start++; + longlong dummy; + if (part_iter->part_info->is_sub_partitioned() && + !part_iter->part_info->get_part_partition_id(part_iter->part_info, + &part_id, &dummy) || + !part_iter->part_info->get_partition_id(part_iter->part_info, + &part_id, &dummy)) + return part_id; + } + return NOT_A_PARTITION_ID; +} + + +/* Same as get_next_partition_via_walking, but for subpartitions */ + +static uint32 get_next_subpartition_via_walking(PARTITION_ITERATOR *part_iter) +{ + uint32 part_id; + Field *field= part_iter->part_info->subpart_field_array[0]; + if (part_iter->field_vals.start == part_iter->field_vals.end) + return NOT_A_PARTITION_ID; + field->store(part_iter->field_vals.start, FALSE); + part_iter->field_vals.start++; + return part_iter->part_info->get_subpartition_id(part_iter->part_info); +} +#endif + diff --git a/sql/sql_partition.h b/sql/sql_partition.h new file mode 100644 index 00000000000..4982b1fcf1f --- /dev/null +++ b/sql/sql_partition.h @@ -0,0 +1,190 @@ +/* Copyright (C) 2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifdef __GNUC__ +#pragma interface /* gcc class implementation */ +#endif + +/* Flags for partition handlers */ +#define HA_CAN_PARTITION (1 << 0) /* Partition support */ +#define HA_CAN_UPDATE_PARTITION_KEY (1 << 1) +#define HA_CAN_PARTITION_UNIQUE (1 << 2) +#define HA_USE_AUTO_PARTITION (1 << 3) + +/*typedef struct { + ulonglong data_file_length; + ulonglong max_data_file_length; + ulonglong index_file_length; + ulonglong delete_length; + ha_rows records; + ulong mean_rec_length; + time_t create_time; + time_t check_time; + time_t update_time; + ulonglong check_sum; +} PARTITION_INFO; +*/ +typedef struct { + longlong list_value; + uint32 partition_id; +} LIST_PART_ENTRY; + +typedef struct { + uint32 start_part; + uint32 end_part; +} part_id_range; + +struct st_partition_iter; +#define NOT_A_PARTITION_ID ((uint32)-1) + +bool is_partition_in_list(char *part_name, List<char> list_part_names); +char *are_partitions_in_table(partition_info *new_part_info, + partition_info *old_part_info); +bool check_reorganise_list(partition_info *new_part_info, + partition_info *old_part_info, + List<char> list_part_names); +handler *get_ha_partition(partition_info *part_info); +int get_parts_for_update(const byte *old_data, byte *new_data, + const byte *rec0, partition_info *part_info, + uint32 *old_part_id, uint32 *new_part_id, + longlong *func_value); +int get_part_for_delete(const byte *buf, const byte *rec0, + partition_info *part_info, uint32 *part_id); +void prune_partition_set(const TABLE *table, part_id_range *part_spec); +bool check_partition_info(partition_info *part_info,handlerton **eng_type, + handler *file, ulonglong max_rows); +bool fix_partition_func(THD *thd, const char *name, TABLE *table, + bool create_table_ind); +char *generate_partition_syntax(partition_info *part_info, + uint *buf_length, bool use_sql_alloc, + bool write_all); +bool partition_key_modified(TABLE *table, List<Item> &fields); +void get_partition_set(const TABLE *table, byte *buf, const uint index, + const key_range *key_spec, + part_id_range *part_spec); +void get_full_part_id_from_key(const TABLE *table, byte *buf, + KEY *key_info, + const key_range *key_spec, + part_id_range *part_spec); +bool mysql_unpack_partition(THD *thd, const uchar *part_buf, + uint part_info_len, + uchar *part_state, uint part_state_len, + TABLE *table, bool is_create_table_ind, + handlerton *default_db_type); +void make_used_partitions_str(partition_info *part_info, String *parts_str); +uint32 get_list_array_idx_for_endpoint(partition_info *part_info, + bool left_endpoint, + bool include_endpoint); +uint32 get_partition_id_range_for_endpoint(partition_info *part_info, + bool left_endpoint, + bool include_endpoint); + +/* + A "Get next" function for partition iterator. + SYNOPSIS + partition_iter_func() + part_iter Partition iterator, you call only "iter.get_next(&iter)" + + RETURN + NOT_A_PARTITION_ID if there are no more partitions. + [sub]partition_id of the next partition +*/ + +typedef uint32 (*partition_iter_func)(st_partition_iter* part_iter); + + +/* + Partition set iterator. Used to enumerate a set of [sub]partitions + obtained in partition interval analysis (see get_partitions_in_range_iter). + + For the user, the only meaningful field is get_next, which may be used as + follows: + part_iterator.get_next(&part_iterator); + + Initialization is done by any of the following calls: + - get_partitions_in_range_iter-type function call + - init_single_partition_iterator() + - init_all_partitions_iterator() + Cleanup is not needed. +*/ + +typedef struct st_partition_iter +{ + partition_iter_func get_next; + + struct st_part_num_range + { + uint32 start; + uint32 end; + }; + + struct st_field_value_range + { + longlong start; + longlong end; + }; + + union + { + struct st_part_num_range part_nums; + struct st_field_value_range field_vals; + }; + partition_info *part_info; +} PARTITION_ITERATOR; + + +/* + Get an iterator for set of partitions that match given field-space interval + + SYNOPSIS + get_partitions_in_range_iter() + part_info Partitioning info + is_subpart + min_val Left edge, field value in opt_range_key format. + max_val Right edge, field value in opt_range_key format. + flags Some combination of NEAR_MIN, NEAR_MAX, NO_MIN_RANGE, + NO_MAX_RANGE. + part_iter Iterator structure to be initialized + + DESCRIPTION + Functions with this signature are used to perform "Partitioning Interval + Analysis". This analysis is applicable for any type of [sub]partitioning + by some function of a single fieldX. The idea is as follows: + Given an interval "const1 <=? fieldX <=? const2", find a set of partitions + that may contain records with value of fieldX within the given interval. + + The min_val, max_val and flags parameters specify the interval. + The set of partitions is returned by initializing an iterator in *part_iter + + NOTES + There are currently two functions of this type: + - get_part_iter_for_interval_via_walking + - get_part_iter_for_interval_via_mapping + + RETURN + 0 - No matching partitions, iterator not initialized + 1 - Some partitions would match, iterator intialized for traversing them + -1 - All partitions would match, iterator not initialized +*/ + +typedef int (*get_partitions_in_range_iter)(partition_info *part_info, + bool is_subpart, + char *min_val, char *max_val, + uint flags, + PARTITION_ITERATOR *part_iter); + +#include "partition_info.h" + diff --git a/sql/sql_plugin.cc b/sql/sql_plugin.cc new file mode 100644 index 00000000000..ad99d81d7b4 --- /dev/null +++ b/sql/sql_plugin.cc @@ -0,0 +1,896 @@ +/* Copyright (C) 2005 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "mysql_priv.h" +#include <my_pthread.h> +#define REPORT_TO_LOG 1 +#define REPORT_TO_USER 2 + +char *opt_plugin_dir_ptr; +char opt_plugin_dir[FN_REFLEN]; +LEX_STRING plugin_type_names[]= +{ + { (char *)STRING_WITH_LEN("UDF") }, + { (char *)STRING_WITH_LEN("STORAGE ENGINE") }, + { (char *)STRING_WITH_LEN("FTPARSER") } +}; +static const char *plugin_interface_version_sym= + "_mysql_plugin_interface_version_"; +static const char *sizeof_st_plugin_sym= + "_mysql_sizeof_struct_st_plugin_"; +static const char *plugin_declarations_sym= "_mysql_plugin_declarations_"; +static int min_plugin_interface_version= 0x0000; +/* Note that 'int version' must be the first field of every plugin + sub-structure (plugin->info). +*/ +static int min_plugin_info_interface_version[MYSQL_MAX_PLUGIN_TYPE_NUM]= +{ + 0x0000, + 0x0000, + 0x0000 +}; +static int cur_plugin_info_interface_version[MYSQL_MAX_PLUGIN_TYPE_NUM]= +{ + 0x0000, /* UDF: not implemented */ + MYSQL_HANDLERTON_INTERFACE_VERSION, + MYSQL_FTPARSER_INTERFACE_VERSION +}; +static DYNAMIC_ARRAY plugin_dl_array; +static DYNAMIC_ARRAY plugin_array; +static HASH plugin_hash[MYSQL_MAX_PLUGIN_TYPE_NUM]; +static rw_lock_t THR_LOCK_plugin; +static bool initialized= 0; + +static struct st_plugin_dl *plugin_dl_find(LEX_STRING *dl) +{ + uint i; + DBUG_ENTER("plugin_dl_find"); + for (i= 0; i < plugin_dl_array.elements; i++) + { + struct st_plugin_dl *tmp= dynamic_element(&plugin_dl_array, i, + struct st_plugin_dl *); + if (tmp->ref_count && + ! my_strnncoll(files_charset_info, + (const uchar *)dl->str, dl->length, + (const uchar *)tmp->dl.str, tmp->dl.length)) + DBUG_RETURN(tmp); + } + DBUG_RETURN(0); +} + + +static st_plugin_dl *plugin_dl_insert_or_reuse(struct st_plugin_dl *plugin_dl) +{ + uint i; + DBUG_ENTER("plugin_dl_insert_or_reuse"); + for (i= 0; i < plugin_dl_array.elements; i++) + { + struct st_plugin_dl *tmp= dynamic_element(&plugin_dl_array, i, + struct st_plugin_dl *); + if (! tmp->ref_count) + { + memcpy(tmp, plugin_dl, sizeof(struct st_plugin_dl)); + DBUG_RETURN(tmp); + } + } + if (insert_dynamic(&plugin_dl_array, (gptr)plugin_dl)) + DBUG_RETURN(0); + DBUG_RETURN(dynamic_element(&plugin_dl_array, plugin_dl_array.elements - 1, + struct st_plugin_dl *)); +} + +static inline void free_plugin_mem(struct st_plugin_dl *p) +{ + if (p->handle) + dlclose(p->handle); + my_free(p->dl.str, MYF(MY_ALLOW_ZERO_PTR)); + if (p->version != MYSQL_PLUGIN_INTERFACE_VERSION) + my_free((gptr)p->plugins, MYF(MY_ALLOW_ZERO_PTR)); +} + +static st_plugin_dl *plugin_dl_add(LEX_STRING *dl, int report) +{ +#ifdef HAVE_DLOPEN + char dlpath[FN_REFLEN]; + uint plugin_dir_len, dummy_errors, dlpathlen; + struct st_plugin_dl *tmp, plugin_dl; + void *sym; + DBUG_ENTER("plugin_dl_add"); + plugin_dir_len= strlen(opt_plugin_dir); + /* + Ensure that the dll doesn't have a path. + This is done to ensure that only approved libraries from the + plugin directory are used (to make this even remotely secure). + */ + if (my_strchr(files_charset_info, dl->str, dl->str + dl->length, FN_LIBCHAR) || + dl->length > NAME_LEN || + plugin_dir_len + dl->length + 1 >= FN_REFLEN) + { + if (report & REPORT_TO_USER) + my_error(ER_UDF_NO_PATHS, MYF(0)); + if (report & REPORT_TO_LOG) + sql_print_error(ER(ER_UDF_NO_PATHS)); + DBUG_RETURN(0); + } + /* If this dll is already loaded just increase ref_count. */ + if ((tmp= plugin_dl_find(dl))) + { + tmp->ref_count++; + DBUG_RETURN(tmp); + } + bzero(&plugin_dl, sizeof(plugin_dl)); + /* Compile dll path */ + dlpathlen= + strxnmov(dlpath, sizeof(dlpath) - 1, opt_plugin_dir, "/", dl->str, NullS) - + dlpath; + plugin_dl.ref_count= 1; + /* Open new dll handle */ + if (!(plugin_dl.handle= dlopen(dlpath, RTLD_NOW))) + { + const char *errmsg=dlerror(); + if (!strncmp(dlpath, errmsg, dlpathlen)) + { // if errmsg starts from dlpath, trim this prefix. + errmsg+=dlpathlen; + if (*errmsg == ':') errmsg++; + if (*errmsg == ' ') errmsg++; + } + if (report & REPORT_TO_USER) + my_error(ER_CANT_OPEN_LIBRARY, MYF(0), dlpath, errno, errmsg); + if (report & REPORT_TO_LOG) + sql_print_error(ER(ER_CANT_OPEN_LIBRARY), dlpath, errno, errmsg); + DBUG_RETURN(0); + } + /* Determine interface version */ + if (!(sym= dlsym(plugin_dl.handle, plugin_interface_version_sym))) + { + free_plugin_mem(&plugin_dl); + if (report & REPORT_TO_USER) + my_error(ER_CANT_FIND_DL_ENTRY, MYF(0), plugin_interface_version_sym); + if (report & REPORT_TO_LOG) + sql_print_error(ER(ER_CANT_FIND_DL_ENTRY), plugin_interface_version_sym); + DBUG_RETURN(0); + } + plugin_dl.version= *(int *)sym; + /* Versioning */ + if (plugin_dl.version < min_plugin_interface_version || + (plugin_dl.version >> 8) > (MYSQL_PLUGIN_INTERFACE_VERSION >> 8)) + { + free_plugin_mem(&plugin_dl); + if (report & REPORT_TO_USER) + my_error(ER_CANT_OPEN_LIBRARY, MYF(0), dlpath, 0, + "plugin interface version mismatch"); + if (report & REPORT_TO_LOG) + sql_print_error(ER(ER_CANT_OPEN_LIBRARY), dlpath, 0, + "plugin interface version mismatch"); + DBUG_RETURN(0); + } + /* Find plugin declarations */ + if (!(sym= dlsym(plugin_dl.handle, plugin_declarations_sym))) + { + free_plugin_mem(&plugin_dl); + if (report & REPORT_TO_USER) + my_error(ER_CANT_FIND_DL_ENTRY, MYF(0), plugin_declarations_sym); + if (report & REPORT_TO_LOG) + sql_print_error(ER(ER_CANT_FIND_DL_ENTRY), plugin_declarations_sym); + DBUG_RETURN(0); + } + + if (plugin_dl.version != MYSQL_PLUGIN_INTERFACE_VERSION) + { + int i, sizeof_st_plugin; + struct st_mysql_plugin *old, *cur; + char *ptr= (char *)sym; + + if ((sym= dlsym(plugin_dl.handle, sizeof_st_plugin_sym))) + sizeof_st_plugin= *(int *)sym; + else + { +#ifdef ERROR_ON_NO_SIZEOF_PLUGIN_SYMBOL + free_plugin_mem(&plugin_dl); + if (report & REPORT_TO_USER) + my_error(ER_CANT_FIND_DL_ENTRY, MYF(0), sizeof_st_plugin_sym); + if (report & REPORT_TO_LOG) + sql_print_error(ER(ER_CANT_FIND_DL_ENTRY), sizeof_st_plugin_sym); + DBUG_RETURN(0); +#else + /* + When the following assert starts failing, we'll have to switch + to the upper branch of the #ifdef + */ + DBUG_ASSERT(min_plugin_interface_version == 0); + sizeof_st_plugin= (int)offsetof(struct st_mysql_plugin, version); +#endif + } + + for (i= 0; + ((struct st_mysql_plugin *)(ptr+i*sizeof_st_plugin))->info; + i++) + /* no op */; + + cur= (struct st_mysql_plugin*) + my_malloc(i*sizeof(struct st_mysql_plugin), MYF(MY_ZEROFILL|MY_WME)); + if (!cur) + { + free_plugin_mem(&plugin_dl); + if (report & REPORT_TO_USER) + my_error(ER_OUTOFMEMORY, MYF(0), plugin_dl.dl.length); + if (report & REPORT_TO_LOG) + sql_print_error(ER(ER_OUTOFMEMORY), plugin_dl.dl.length); + DBUG_RETURN(0); + } + /* + All st_plugin fields not initialized in the plugin explicitly, are + set to 0. It matches C standard behaviour for struct initializers that + have less values than the struct definition. + */ + for (i=0; + (old=(struct st_mysql_plugin *)(ptr+i*sizeof_st_plugin))->info; + i++) + memcpy(cur+i, old, min(sizeof(cur[i]), sizeof_st_plugin)); + + sym= cur; + } + plugin_dl.plugins= (struct st_mysql_plugin *)sym; + + /* Duplicate and convert dll name */ + plugin_dl.dl.length= dl->length * files_charset_info->mbmaxlen + 1; + if (! (plugin_dl.dl.str= my_malloc(plugin_dl.dl.length, MYF(0)))) + { + free_plugin_mem(&plugin_dl); + if (report & REPORT_TO_USER) + my_error(ER_OUTOFMEMORY, MYF(0), plugin_dl.dl.length); + if (report & REPORT_TO_LOG) + sql_print_error(ER(ER_OUTOFMEMORY), plugin_dl.dl.length); + DBUG_RETURN(0); + } + plugin_dl.dl.length= copy_and_convert(plugin_dl.dl.str, plugin_dl.dl.length, + files_charset_info, dl->str, dl->length, system_charset_info, + &dummy_errors); + plugin_dl.dl.str[plugin_dl.dl.length]= 0; + /* Add this dll to array */ + if (! (tmp= plugin_dl_insert_or_reuse(&plugin_dl))) + { + free_plugin_mem(&plugin_dl); + if (report & REPORT_TO_USER) + my_error(ER_OUTOFMEMORY, MYF(0), sizeof(struct st_plugin_dl)); + if (report & REPORT_TO_LOG) + sql_print_error(ER(ER_OUTOFMEMORY), sizeof(struct st_plugin_dl)); + DBUG_RETURN(0); + } + DBUG_RETURN(tmp); +#else + DBUG_ENTER("plugin_dl_add"); + if (report & REPORT_TO_USER) + my_error(ER_FEATURE_DISABLED, MYF(0), "plugin", "HAVE_DLOPEN"); + if (report & REPORT_TO_LOG) + sql_print_error(ER(ER_FEATURE_DISABLED), "plugin", "HAVE_DLOPEN"); + DBUG_RETURN(0); +#endif +} + + +static void plugin_dl_del(LEX_STRING *dl) +{ +#ifdef HAVE_DLOPEN + uint i; + DBUG_ENTER("plugin_dl_del"); + for (i= 0; i < plugin_dl_array.elements; i++) + { + struct st_plugin_dl *tmp= dynamic_element(&plugin_dl_array, i, + struct st_plugin_dl *); + if (tmp->ref_count && + ! my_strnncoll(files_charset_info, + (const uchar *)dl->str, dl->length, + (const uchar *)tmp->dl.str, tmp->dl.length)) + { + /* Do not remove this element, unless no other plugin uses this dll. */ + if (! --tmp->ref_count) + { + free_plugin_mem(tmp); + bzero(tmp, sizeof(struct st_plugin_dl)); + } + break; + } + } + DBUG_VOID_RETURN; +#endif +} + + +static struct st_plugin_int *plugin_find_internal(LEX_STRING *name, int type) +{ + uint i; + DBUG_ENTER("plugin_find_internal"); + if (! initialized) + DBUG_RETURN(0); + if (type == MYSQL_ANY_PLUGIN) + { + for (i= 0; i < MYSQL_MAX_PLUGIN_TYPE_NUM; i++) + { + struct st_plugin_int *plugin= (st_plugin_int *) + hash_search(&plugin_hash[i], (const byte *)name->str, name->length); + if (plugin) + DBUG_RETURN(plugin); + } + } + else + DBUG_RETURN((st_plugin_int *) + hash_search(&plugin_hash[type], (const byte *)name->str, name->length)); + DBUG_RETURN(0); +} + + +my_bool plugin_is_ready(LEX_STRING *name, int type) +{ + my_bool rc= FALSE; + struct st_plugin_int *plugin; + DBUG_ENTER("plugin_is_ready"); + rw_rdlock(&THR_LOCK_plugin); + if ((plugin= plugin_find_internal(name, type)) && + plugin->state == PLUGIN_IS_READY) + rc= TRUE; + rw_unlock(&THR_LOCK_plugin); + DBUG_RETURN(rc); +} + + +struct st_plugin_int *plugin_lock(LEX_STRING *name, int type) +{ + struct st_plugin_int *rc; + DBUG_ENTER("plugin_lock"); + rw_wrlock(&THR_LOCK_plugin); + if ((rc= plugin_find_internal(name, type))) + { + if (rc->state == PLUGIN_IS_READY || rc->state == PLUGIN_IS_UNINITIALIZED) + rc->ref_count++; + else + rc= 0; + } + rw_unlock(&THR_LOCK_plugin); + DBUG_RETURN(rc); +} + + +static st_plugin_int *plugin_insert_or_reuse(struct st_plugin_int *plugin) +{ + uint i; + DBUG_ENTER("plugin_insert_or_reuse"); + for (i= 0; i < plugin_array.elements; i++) + { + struct st_plugin_int *tmp= dynamic_element(&plugin_array, i, + struct st_plugin_int *); + if (tmp->state == PLUGIN_IS_FREED) + { + memcpy(tmp, plugin, sizeof(struct st_plugin_int)); + DBUG_RETURN(tmp); + } + } + if (insert_dynamic(&plugin_array, (gptr)plugin)) + DBUG_RETURN(0); + DBUG_RETURN(dynamic_element(&plugin_array, plugin_array.elements - 1, + struct st_plugin_int *)); +} + +static my_bool plugin_add(LEX_STRING *name, LEX_STRING *dl, int report) +{ + struct st_plugin_int tmp; + struct st_mysql_plugin *plugin; + DBUG_ENTER("plugin_add"); + if (plugin_find_internal(name, MYSQL_ANY_PLUGIN)) + { + if (report & REPORT_TO_USER) + my_error(ER_UDF_EXISTS, MYF(0), name->str); + if (report & REPORT_TO_LOG) + sql_print_error(ER(ER_UDF_EXISTS), name->str); + DBUG_RETURN(TRUE); + } + if (! (tmp.plugin_dl= plugin_dl_add(dl, report))) + DBUG_RETURN(TRUE); + /* Find plugin by name */ + for (plugin= tmp.plugin_dl->plugins; plugin->info; plugin++) + { + uint name_len= strlen(plugin->name); + if (plugin->type >= 0 && plugin->type < MYSQL_MAX_PLUGIN_TYPE_NUM && + ! my_strnncoll(system_charset_info, + (const uchar *)name->str, name->length, + (const uchar *)plugin->name, + name_len)) + { + struct st_plugin_int *tmp_plugin_ptr; + if (*(int*)plugin->info < + min_plugin_info_interface_version[plugin->type] || + ((*(int*)plugin->info) >> 8) > + (cur_plugin_info_interface_version[plugin->type] >> 8)) + { + char buf[256]; + strxnmov(buf, sizeof(buf) - 1, "API version for ", + plugin_type_names[plugin->type].str, + " plugin is too different", NullS); + if (report & REPORT_TO_USER) + my_error(ER_CANT_OPEN_LIBRARY, MYF(0), dl->str, 0, buf); + if (report & REPORT_TO_LOG) + sql_print_error(ER(ER_CANT_OPEN_LIBRARY), dl->str, 0, buf); + goto err; + } + tmp.plugin= plugin; + tmp.name.str= (char *)plugin->name; + tmp.name.length= name_len; + tmp.ref_count= 0; + tmp.state= PLUGIN_IS_UNINITIALIZED; + if (plugin->status_vars) + { + SHOW_VAR array[2]= { + {plugin->name, (char*)plugin->status_vars, SHOW_ARRAY}, + {0, 0, SHOW_UNDEF} + }; + if (add_status_vars(array)) // add_status_vars makes a copy + goto err; + } + if (! (tmp_plugin_ptr= plugin_insert_or_reuse(&tmp))) + goto err; + if (my_hash_insert(&plugin_hash[plugin->type], (byte*)tmp_plugin_ptr)) + { + tmp_plugin_ptr->state= PLUGIN_IS_FREED; + goto err; + } + DBUG_RETURN(FALSE); + } + } + if (report & REPORT_TO_USER) + my_error(ER_CANT_FIND_DL_ENTRY, MYF(0), name->str); + if (report & REPORT_TO_LOG) + sql_print_error(ER(ER_CANT_FIND_DL_ENTRY), name->str); +err: + if (plugin->status_vars) + { + SHOW_VAR array[2]= { + {plugin->name, (char*)plugin->status_vars, SHOW_ARRAY}, + {0, 0, SHOW_UNDEF} + }; + remove_status_vars(array); + } + plugin_dl_del(dl); + DBUG_RETURN(TRUE); +} + + +static void plugin_del(LEX_STRING *name) +{ + uint i; + struct st_plugin_int *plugin; + DBUG_ENTER("plugin_del"); + if ((plugin= plugin_find_internal(name, MYSQL_ANY_PLUGIN))) + { + if (plugin->plugin->status_vars) + { + SHOW_VAR array[2]= { + {plugin->plugin->name, (char*)plugin->plugin->status_vars, SHOW_ARRAY}, + {0, 0, SHOW_UNDEF} + }; + remove_status_vars(array); + } + hash_delete(&plugin_hash[plugin->plugin->type], (byte*)plugin); + plugin_dl_del(&plugin->plugin_dl->dl); + plugin->state= PLUGIN_IS_FREED; + } + DBUG_VOID_RETURN; +} + + +void plugin_unlock(struct st_plugin_int *plugin) +{ + DBUG_ENTER("plugin_unlock"); + rw_wrlock(&THR_LOCK_plugin); + DBUG_ASSERT(plugin && plugin->ref_count); + plugin->ref_count--; + if (plugin->state == PLUGIN_IS_DELETED && ! plugin->ref_count) + { + if (plugin->plugin->deinit) + plugin->plugin->deinit(); + plugin_del(&plugin->name); + } + rw_unlock(&THR_LOCK_plugin); + DBUG_VOID_RETURN; +} + + +static int plugin_initialize(struct st_plugin_int *plugin) +{ + DBUG_ENTER("plugin_initialize"); + + if (plugin->plugin->init) + { + if (plugin->plugin->init()) + { + sql_print_error("Plugin '%s' init function returned error.", + plugin->name.str); + DBUG_PRINT("warning", ("Plugin '%s' init function returned error.", + plugin->name.str)); + goto err; + } + } + + switch (plugin->plugin->type) + { + case MYSQL_STORAGE_ENGINE_PLUGIN: + sql_print_error("Storage Engine plugins are unsupported in this version."); + goto err; + default: + break; + } + + DBUG_RETURN(0); +err: + DBUG_RETURN(1); +} + +static void plugin_call_initializer(void) +{ + uint i; + DBUG_ENTER("plugin_call_initializer"); + for (i= 0; i < plugin_array.elements; i++) + { + struct st_plugin_int *tmp= dynamic_element(&plugin_array, i, + struct st_plugin_int *); + if (tmp->state == PLUGIN_IS_UNINITIALIZED) + { + if (plugin_initialize(tmp)) + plugin_del(&tmp->name); + else + tmp->state= PLUGIN_IS_READY; + } + } + DBUG_VOID_RETURN; +} + + +static void plugin_call_deinitializer(void) +{ + uint i; + DBUG_ENTER("plugin_call_deinitializer"); + for (i= 0; i < plugin_array.elements; i++) + { + struct st_plugin_int *tmp= dynamic_element(&plugin_array, i, + struct st_plugin_int *); + if (tmp->state == PLUGIN_IS_READY) + { + if (tmp->plugin->deinit) + { + DBUG_PRINT("info", ("Deinitializing plugin: '%s'", tmp->name.str)); + if (tmp->plugin->deinit()) + { + DBUG_PRINT("warning", ("Plugin '%s' deinit function returned error.", + tmp->name.str)); + } + } + tmp->state= PLUGIN_IS_UNINITIALIZED; + } + } + DBUG_VOID_RETURN; +} + + +static byte *get_hash_key(const byte *buff, uint *length, + my_bool not_used __attribute__((unused))) +{ + struct st_plugin_int *plugin= (st_plugin_int *)buff; + *length= (uint)plugin->name.length; + return((byte *)plugin->name.str); +} + + +int plugin_init(void) +{ + int i; + DBUG_ENTER("plugin_init"); + + if (initialized) + DBUG_RETURN(0); + + my_rwlock_init(&THR_LOCK_plugin, NULL); + + if (my_init_dynamic_array(&plugin_dl_array, + sizeof(struct st_plugin_dl),16,16) || + my_init_dynamic_array(&plugin_array, + sizeof(struct st_plugin_int),16,16)) + goto err; + + for (i= 0; i < MYSQL_MAX_PLUGIN_TYPE_NUM; i++) + { + if (hash_init(&plugin_hash[i], system_charset_info, 16, 0, 0, + get_hash_key, NULL, 0)) + goto err; + } + + initialized= 1; + + DBUG_RETURN(0); + +err: + DBUG_RETURN(1); +} + + +my_bool plugin_register_builtin(struct st_mysql_plugin *plugin) +{ + struct st_plugin_int tmp; + DBUG_ENTER("plugin_register_builtin"); + + tmp.plugin= plugin; + tmp.name.str= (char *)plugin->name; + tmp.name.length= strlen(plugin->name); + tmp.state= PLUGIN_IS_UNINITIALIZED; + + /* Cannot be unloaded */ + tmp.ref_count= 1; + tmp.plugin_dl= 0; + + if (insert_dynamic(&plugin_array, (gptr)&tmp)) + DBUG_RETURN(1); + + if (my_hash_insert(&plugin_hash[plugin->type], + (byte*)dynamic_element(&plugin_array, + plugin_array.elements - 1, + struct st_plugin_int *))) + DBUG_RETURN(1); + + DBUG_RETURN(0); +} + + +void plugin_load(void) +{ + TABLE_LIST tables; + TABLE *table; + READ_RECORD read_record_info; + int error, i; + MEM_ROOT mem; + THD *new_thd; + DBUG_ENTER("plugin_load"); + + DBUG_ASSERT(initialized); + + if (!(new_thd= new THD)) + { + sql_print_error("Can't allocate memory for plugin structures"); + delete new_thd; + DBUG_VOID_RETURN; + } + init_sql_alloc(&mem, 1024, 0); + new_thd->thread_stack= (char*) &tables; + new_thd->store_globals(); + new_thd->db= my_strdup("mysql", MYF(0)); + new_thd->db_length= 5; + bzero((gptr)&tables, sizeof(tables)); + tables.alias= tables.table_name= (char*)"plugin"; + tables.lock_type= TL_READ; + tables.db= new_thd->db; + if (simple_open_n_lock_tables(new_thd, &tables)) + { + DBUG_PRINT("error",("Can't open plugin table")); + sql_print_error("Can't open the mysql.plugin table. Please run the mysql_upgrade script to create it."); + goto end; + } + table= tables.table; + init_read_record(&read_record_info, new_thd, table, NULL, 1, 0); + while (!(error= read_record_info.read_record(&read_record_info))) + { + DBUG_PRINT("info", ("init plugin record")); + LEX_STRING name, dl; + name.str= get_field(&mem, table->field[0]); + name.length= strlen(name.str); + dl.str= get_field(&mem, table->field[1]); + dl.length= strlen(dl.str); + if (plugin_add(&name, &dl, REPORT_TO_LOG)) + DBUG_PRINT("warning", ("Couldn't load plugin named '%s' with soname '%s'.", + name.str, dl.str)); + } + plugin_call_initializer(); + if (error > 0) + sql_print_error(ER(ER_GET_ERRNO), my_errno); + end_read_record(&read_record_info); + new_thd->version--; // Force close to free memory +end: + free_root(&mem, MYF(0)); + close_thread_tables(new_thd); + delete new_thd; + /* Remember that we don't have a THD */ + my_pthread_setspecific_ptr(THR_THD, 0); + DBUG_VOID_RETURN; +} + + +void plugin_free(void) +{ + uint i; + DBUG_ENTER("plugin_free"); + plugin_call_deinitializer(); + for (i= 0; i < MYSQL_MAX_PLUGIN_TYPE_NUM; i++) + hash_free(&plugin_hash[i]); + delete_dynamic(&plugin_array); + for (i= 0; i < plugin_dl_array.elements; i++) + { + struct st_plugin_dl *tmp= dynamic_element(&plugin_dl_array, i, + struct st_plugin_dl *); + free_plugin_mem(tmp); + } + delete_dynamic(&plugin_dl_array); + if (initialized) + { + initialized= 0; + rwlock_destroy(&THR_LOCK_plugin); + } + DBUG_VOID_RETURN; +} + + +my_bool mysql_install_plugin(THD *thd, LEX_STRING *name, LEX_STRING *dl) +{ + TABLE_LIST tables; + TABLE *table; + int error; + struct st_plugin_int *tmp; + DBUG_ENTER("mysql_install_plugin"); + + bzero(&tables, sizeof(tables)); + tables.db= (char *)"mysql"; + tables.table_name= tables.alias= (char *)"plugin"; + if (check_table_access(thd, INSERT_ACL, &tables, 0)) + DBUG_RETURN(TRUE); + + /* need to open before acquiring THR_LOCK_plugin or it will deadlock */ + if (! (table = open_ltable(thd, &tables, TL_WRITE))) + DBUG_RETURN(TRUE); + + rw_wrlock(&THR_LOCK_plugin); + if (plugin_add(name, dl, REPORT_TO_USER)) + goto err; + tmp= plugin_find_internal(name, MYSQL_ANY_PLUGIN); + + if (plugin_initialize(tmp)) + { + my_error(ER_CANT_INITIALIZE_UDF, MYF(0), name->str, + "Plugin initialization function failed."); + goto err; + } + + tmp->state= PLUGIN_IS_READY; + + restore_record(table, s->default_values); + table->field[0]->store(name->str, name->length, system_charset_info); + table->field[1]->store(dl->str, dl->length, files_charset_info); + error= table->file->ha_write_row(table->record[0]); + if (error) + { + table->file->print_error(error, MYF(0)); + goto deinit; + } + + rw_unlock(&THR_LOCK_plugin); + DBUG_RETURN(FALSE); +deinit: + if (tmp->plugin->deinit) + tmp->plugin->deinit(); +err: + plugin_del(name); + rw_unlock(&THR_LOCK_plugin); + DBUG_RETURN(TRUE); +} + + +my_bool mysql_uninstall_plugin(THD *thd, LEX_STRING *name) +{ + TABLE *table; + TABLE_LIST tables; + struct st_plugin_int *plugin; + DBUG_ENTER("mysql_uninstall_plugin"); + + bzero(&tables, sizeof(tables)); + tables.db= (char *)"mysql"; + tables.table_name= tables.alias= (char *)"plugin"; + + /* need to open before acquiring THR_LOCK_plugin or it will deadlock */ + if (! (table= open_ltable(thd, &tables, TL_WRITE))) + DBUG_RETURN(TRUE); + + rw_wrlock(&THR_LOCK_plugin); + if (!(plugin= plugin_find_internal(name, MYSQL_ANY_PLUGIN))) + { + my_error(ER_SP_DOES_NOT_EXIST, MYF(0), "PLUGIN", name->str); + goto err; + } + if (!plugin->plugin_dl) + { + push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, 0, + "Built-in plugins cannot be deleted,."); + my_error(ER_SP_DOES_NOT_EXIST, MYF(0), "PLUGIN", name->str); + goto err; + } + + if (plugin->ref_count) + { + plugin->state= PLUGIN_IS_DELETED; + push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, 0, + "Plugin is not deleted, waiting on tables."); + } + else + { + if (plugin->plugin->deinit) + plugin->plugin->deinit(); + plugin_del(name); + } + + table->field[0]->store(name->str, name->length, system_charset_info); + table->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS); + if (! table->file->index_read_idx(table->record[0], 0, + (byte *)table->field[0]->ptr, + table->key_info[0].key_length, + HA_READ_KEY_EXACT)) + { + int error; + if ((error= table->file->ha_delete_row(table->record[0]))) + { + table->file->print_error(error, MYF(0)); + goto err; + } + } + rw_unlock(&THR_LOCK_plugin); + DBUG_RETURN(FALSE); +err: + rw_unlock(&THR_LOCK_plugin); + DBUG_RETURN(TRUE); +} + + +my_bool plugin_foreach(THD *thd, plugin_foreach_func *func, + int type, void *arg) +{ + uint idx; + struct st_plugin_int *plugin; + DBUG_ENTER("mysql_uninstall_plugin"); + rw_rdlock(&THR_LOCK_plugin); + + if (type == MYSQL_ANY_PLUGIN) + { + for (idx= 0; idx < plugin_array.elements; idx++) + { + plugin= dynamic_element(&plugin_array, idx, struct st_plugin_int *); + + /* FREED records may have garbage pointers */ + if ((plugin->state != PLUGIN_IS_FREED) && + func(thd, plugin, arg)) + goto err; + } + } + else + { + HASH *hash= &plugin_hash[type]; + for (idx= 0; idx < hash->records; idx++) + { + plugin= (struct st_plugin_int *) hash_element(hash, idx); + if ((plugin->state != PLUGIN_IS_FREED) && + (plugin->state != PLUGIN_IS_DELETED) && + func(thd, plugin, arg)) + goto err; + } + } + + rw_unlock(&THR_LOCK_plugin); + DBUG_RETURN(FALSE); +err: + rw_unlock(&THR_LOCK_plugin); + DBUG_RETURN(TRUE); +} diff --git a/sql/sql_plugin.h b/sql/sql_plugin.h new file mode 100644 index 00000000000..672db105cd1 --- /dev/null +++ b/sql/sql_plugin.h @@ -0,0 +1,83 @@ +/* Copyright (C) 2005 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef _sql_plugin_h +#define _sql_plugin_h + +/* + the following #define adds server-only members to enum_mysql_show_type, + that is defined in plugin.h +*/ +#define SHOW_FUNC SHOW_FUNC, SHOW_KEY_CACHE_LONG, SHOW_KEY_CACHE_LONGLONG, \ + SHOW_LONG_STATUS, SHOW_DOUBLE_STATUS, SHOW_HAVE, \ + SHOW_MY_BOOL, SHOW_HA_ROWS, SHOW_SYS, SHOW_LONG_NOFLUSH +#include <mysql/plugin.h> +#undef SHOW_FUNC +typedef enum enum_mysql_show_type SHOW_TYPE; +typedef struct st_mysql_show_var SHOW_VAR; + +#define MYSQL_ANY_PLUGIN -1 + +enum enum_plugin_state +{ + PLUGIN_IS_FREED= 0, + PLUGIN_IS_DELETED, + PLUGIN_IS_UNINITIALIZED, + PLUGIN_IS_READY +}; + +/* A handle for the dynamic library containing a plugin or plugins. */ + +struct st_plugin_dl +{ + LEX_STRING dl; + void *handle; + struct st_mysql_plugin *plugins; + int version; + uint ref_count; /* number of plugins loaded from the library */ +}; + +/* A handle of a plugin */ + +struct st_plugin_int +{ + LEX_STRING name; + struct st_mysql_plugin *plugin; + struct st_plugin_dl *plugin_dl; + enum enum_plugin_state state; + uint ref_count; /* number of threads using the plugin */ +}; + +extern char *opt_plugin_dir_ptr; +extern char opt_plugin_dir[FN_REFLEN]; +extern LEX_STRING plugin_type_names[]; +extern int plugin_init(void); +extern void plugin_load(void); +extern void plugin_free(void); +extern my_bool plugin_is_ready(LEX_STRING *name, int type); +extern st_plugin_int *plugin_lock(LEX_STRING *name, int type); +extern void plugin_unlock(struct st_plugin_int *plugin); +extern my_bool mysql_install_plugin(THD *thd, LEX_STRING *name, LEX_STRING *dl); +extern my_bool mysql_uninstall_plugin(THD *thd, LEX_STRING *name); + +extern my_bool plugin_register_builtin(struct st_mysql_plugin *plugin); + +typedef my_bool (plugin_foreach_func)(THD *thd, + st_plugin_int *plugin, + void *arg); +extern my_bool plugin_foreach(THD *thd, plugin_foreach_func *func, + int type, void *arg); +#endif diff --git a/sql/sql_prepare.cc b/sql/sql_prepare.cc index 332f699200e..a66b2bdb9b6 100644 --- a/sql/sql_prepare.cc +++ b/sql/sql_prepare.cc @@ -1733,7 +1733,9 @@ static bool check_prepared_statement(Prepared_statement *stmt, case SQLCOM_SHOW_COLUMN_TYPES: case SQLCOM_SHOW_STATUS: case SQLCOM_SHOW_VARIABLES: - case SQLCOM_SHOW_LOGS: + case SQLCOM_SHOW_ENGINE_LOGS: + case SQLCOM_SHOW_ENGINE_STATUS: + case SQLCOM_SHOW_ENGINE_MUTEX: case SQLCOM_SHOW_TABLES: case SQLCOM_SHOW_OPEN_TABLES: case SQLCOM_SHOW_CHARSETS: @@ -1871,7 +1873,7 @@ void mysql_stmt_prepare(THD *thd, const char *packet, uint packet_length) thd->stmt_map.erase(stmt); } else - mysql_log.write(thd, COM_STMT_PREPARE, "[%lu] %s", stmt->id, packet); + general_log_print(thd, COM_STMT_PREPARE, "[%lu] %s", stmt->id, packet); /* check_prepared_statemnt sends the metadata packet in case of success */ DBUG_VOID_RETURN; @@ -2240,7 +2242,7 @@ void mysql_stmt_execute(THD *thd, char *packet_arg, uint packet_length) if (!(specialflag & SPECIAL_NO_PRIOR)) my_pthread_setprio(pthread_self(), WAIT_PRIOR); if (error == 0) - mysql_log.write(thd, COM_STMT_EXECUTE, "[%lu] %s", stmt->id, thd->query); + general_log_print(thd, COM_STMT_EXECUTE, "[%lu] %s", stmt->id, thd->query); DBUG_VOID_RETURN; @@ -2619,7 +2621,7 @@ void Prepared_statement::setup_set_params() { /* Setup binary logging */ if (mysql_bin_log.is_open() && is_update_query(lex->sql_command) || - mysql_log.is_open() || mysql_slow_log.is_open()) + opt_log || opt_slow_log) { set_params_from_vars= insert_params_from_vars_with_log; #ifndef EMBEDDED_LIBRARY @@ -2754,11 +2756,11 @@ bool Prepared_statement::prepare(const char *packet, uint packet_len) old_stmt_arena= thd->stmt_arena; thd->stmt_arena= this; lex_start(thd, (uchar*) thd->query, thd->query_length); - lex->safe_to_cache_query= FALSE; lex->stmt_prepare_mode= TRUE; error= yyparse((void *)thd) || thd->is_fatal_error || thd->net.report_error || init_param_array(this); + lex->safe_to_cache_query= FALSE; /* While doing context analysis of the query (in check_prepared_statement) we allocate a lot of additional memory: for open tables, JOINs, derived diff --git a/sql/sql_rename.cc b/sql/sql_rename.cc index 74951029de9..e3468b2b5cf 100644 --- a/sql/sql_rename.cc +++ b/sql/sql_rename.cc @@ -32,7 +32,7 @@ static TABLE_LIST *reverse_table_list(TABLE_LIST *table_list); second entry is the new name. */ -bool mysql_rename_tables(THD *thd, TABLE_LIST *table_list) +bool mysql_rename_tables(THD *thd, TABLE_LIST *table_list, bool silent) { bool error= 1; TABLE_LIST *ren_table= 0; @@ -80,13 +80,13 @@ bool mysql_rename_tables(THD *thd, TABLE_LIST *table_list) } /* Lets hope this doesn't fail as the result will be messy */ - if (!error) + if (!silent && !error) { if (mysql_bin_log.is_open()) { thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, 0, FALSE); - mysql_bin_log.write(&qinfo); + thd->binlog_query(THD::STMT_QUERY_TYPE, + thd->query, thd->query_length, FALSE, FALSE); } send_ok(thd); } @@ -135,7 +135,7 @@ rename_tables(THD *thd, TABLE_LIST *table_list, bool skip_error) { TABLE_LIST *ren_table,*new_table; frm_type_enum frm_type; - db_type table_type; + enum legacy_db_type table_type; DBUG_ENTER("rename_tables"); @@ -156,18 +156,15 @@ rename_tables(THD *thd, TABLE_LIST *table_list, bool skip_error) old_alias= ren_table->table_name; new_alias= new_table->table_name; } - sprintf(name,"%s/%s/%s%s",mysql_data_home, - new_table->db, new_alias, reg_ext); - unpack_filename(name, name); + build_table_filename(name, sizeof(name), + new_table->db, new_alias, reg_ext); if (!access(name,F_OK)) { my_error(ER_TABLE_EXISTS_ERROR, MYF(0), new_alias); DBUG_RETURN(ren_table); // This can't be skipped } - sprintf(name,"%s/%s/%s%s",mysql_data_home, - ren_table->db, old_alias, - reg_ext); - unpack_filename(name, name); + build_table_filename(name, sizeof(name), + ren_table->db, old_alias, reg_ext); frm_type= mysql_frm_type(thd, name, &table_type); switch (frm_type) @@ -178,7 +175,9 @@ rename_tables(THD *thd, TABLE_LIST *table_list, bool skip_error) my_error(ER_FILE_NOT_FOUND, MYF(0), name, my_errno); else { - if (!(rc= mysql_rename_table(table_type, ren_table->db, old_alias, + if (!(rc= mysql_rename_table(ha_resolve_by_legacy_type(thd, + table_type), + ren_table->db, old_alias, new_table->db, new_alias))) { if ((rc= Table_triggers_list::change_table_name(thd, ren_table->db, @@ -192,7 +191,9 @@ rename_tables(THD *thd, TABLE_LIST *table_list, bool skip_error) triggers appropriately. So let us revert operations on .frm and handler's data and report about failure to rename table. */ - (void) mysql_rename_table(table_type, new_table->db, new_alias, + (void) mysql_rename_table(ha_resolve_by_legacy_type(thd, + table_type), + new_table->db, new_alias, ren_table->db, old_alias); } } diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc index 07678d97800..743a0a6b565 100644 --- a/sql/sql_repl.cc +++ b/sql/sql_repl.cc @@ -19,6 +19,7 @@ #include "sql_repl.h" #include "log_event.h" +#include "rpl_filter.h" #include <my_dir.h> int max_binlog_dump_events = 0; // unlimited @@ -971,6 +972,9 @@ int reset_slave(THD *thd, MASTER_INFO* mi) error=1; goto err; } + + ha_reset_slave(thd); + // delete relay logs, clear relay log coordinates if ((error= purge_relay_logs(&mi->rli, thd, 1 /* just reset */, @@ -1321,6 +1325,13 @@ bool mysql_show_binlog_events(THD* thd) Format_description_log_event *description_event= new Format_description_log_event(3); /* MySQL 4.0 by default */ + /* + Wait for handlers to insert any pending information + into the binlog. For e.g. ndb which updates the binlog asynchronously + this is needed so that the uses sees all its own commands in the binlog + */ + ha_binlog_wait(thd); + if (mysql_bin_log.is_open()) { LEX_MASTER_INFO *lex_mi= &thd->lex->mi; @@ -1463,8 +1474,8 @@ bool show_binlog_info(THD* thd) int dir_len = dirname_length(li.log_file_name); protocol->store(li.log_file_name + dir_len, &my_charset_bin); protocol->store((ulonglong) li.pos); - protocol->store(&binlog_do_db); - protocol->store(&binlog_ignore_db); + protocol->store(binlog_filter->get_do_db()); + protocol->store(binlog_filter->get_ignore_db()); if (protocol->write()) DBUG_RETURN(TRUE); } @@ -1565,6 +1576,8 @@ int log_loaded_block(IO_CACHE* file) if (!(block_len = (char*) file->read_end - (char*) buffer)) return 0; lf_info = (LOAD_FILE_INFO*) file->arg; + if (lf_info->thd->current_stmt_binlog_row_based) + return 0; if (lf_info->last_pos_in_file != HA_POS_ERROR && lf_info->last_pos_in_file >= file->pos_in_file) return 0; diff --git a/sql/sql_repl.h b/sql/sql_repl.h index 9eb6456ee20..789de64da85 100644 --- a/sql/sql_repl.h +++ b/sql/sql_repl.h @@ -14,6 +14,8 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include "rpl_filter.h" + #ifdef HAVE_REPLICATION #include "slave.h" @@ -31,7 +33,6 @@ typedef struct st_slave_info extern my_bool opt_show_slave_auth_info; extern char *master_host, *master_info_file; extern bool server_id_supplied; -extern I_List<i_string> binlog_do_db, binlog_ignore_db; extern int max_binlog_dump_events; extern my_bool opt_sporadic_binlog_dump_fail; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 768ae7bf71f..7695001cd67 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -649,6 +649,26 @@ JOIN::optimize() } } +#ifdef WITH_PARTITION_STORAGE_ENGINE + { + TABLE_LIST *tbl; + for (tbl= select_lex->leaf_tables; tbl; tbl= tbl->next_leaf) + { + /* + If tbl->embedding!=NULL that means that this table is in the inner + part of the nested outer join, and we can't do partition pruning + (TODO: check if this limitation can be lifted) + */ + if (!tbl->embedding) + { + Item *prune_cond= tbl->on_expr? tbl->on_expr : conds; + tbl->table->no_partitions_used= prune_partitions(thd, tbl->table, + prune_cond); + } + } + } +#endif + /* Optimize count(*), min() and max() */ if (tables_list && tmp_table_param.sum_func_count && ! group_list) { @@ -977,23 +997,19 @@ JOIN::optimize() } /* - Need to tell Innobase that to play it safe, it should fetch all - columns of the tables: this is because MySQL may build row - pointers for the rows, and for all columns of the primary key the - field->query_id has not necessarily been set to thd->query_id by - MySQL. + Need to tell handlers that to play it safe, it should fetch all + columns of the primary key of the tables: this is because MySQL may + build row pointers for the rows, and for all columns of the primary key + the read set has not necessarily been set by the server code. */ - -#ifdef HAVE_INNOBASE_DB if (need_tmp || select_distinct || group_list || order) { for (uint i_h = const_tables; i_h < tables; i_h++) { TABLE* table_h = join_tab[i_h].table; - table_h->file->extra(HA_EXTRA_RETRIEVE_PRIMARY_KEY); + table_h->file->ha_retrieve_all_pk(); } } -#endif DBUG_EXECUTE("info",TEST_join(this);); @@ -1355,6 +1371,9 @@ JOIN::exec() /* Copy data to the temporary table */ thd->proc_info= "Copying to tmp table"; DBUG_PRINT("info", ("%s", thd->proc_info)); + if (!curr_join->sort_and_group && + curr_join->const_tables != curr_join->tables) + curr_join->join_tab[curr_join->const_tables].sorted= 0; if ((tmp_error= do_select(curr_join, (List<Item> *) 0, curr_tmp_table, 0))) { error= tmp_error; @@ -1510,6 +1529,9 @@ JOIN::exec() 1, TRUE)) DBUG_VOID_RETURN; curr_join->group_list= 0; + if (!curr_join->sort_and_group && + curr_join->const_tables != curr_join->tables) + curr_join->join_tab[curr_join->const_tables].sorted= 0; if (setup_sum_funcs(curr_join->thd, curr_join->sum_funcs) || (tmp_error= do_select(curr_join, (List<Item> *) 0, curr_tmp_table, 0))) @@ -1696,6 +1718,16 @@ JOIN::exec() (select_options & OPTION_FOUND_ROWS ? HA_POS_ERROR : unit->select_limit_cnt))) DBUG_VOID_RETURN; + if (curr_join->const_tables != curr_join->tables && + !curr_join->join_tab[curr_join->const_tables].table->sort.io_cache) + { + /* + If no IO cache exists for the first table then we are using an + INDEX SCAN and no filesort. Thus we should not remove the sorted + attribute on the INDEX SCAN. + */ + skip_sort_order= 1; + } } } /* XXX: When can we have here thd->net.report_error not zero? */ @@ -2022,7 +2054,11 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables, COND *conds, if (*s->on_expr_ref) { /* s is the only inner table of an outer join */ +#ifdef WITH_PARTITION_STORAGE_ENGINE + if ((!table->file->records || table->no_partitions_used) && !embedding) +#else if (!table->file->records && !embedding) +#endif { // Empty table s->dependent= 0; // Ignore LEFT JOIN depend. set_position(join,const_count++,s,(KEYUSE*) 0); @@ -2049,8 +2085,14 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables, COND *conds, while (embedding); continue; } - - if ((table->s->system || table->file->records <= 1) && ! s->dependent && +#ifdef WITH_PARTITION_STORAGE_ENGINE + bool no_partitions_used= table->no_partitions_used; +#else + const bool no_partitions_used= FALSE; +#endif + if ((table->s->system || table->file->records <= 1 || + no_partitions_used) && + !s->dependent && !(table->file->table_flags() & HA_NOT_EXACT_COUNT) && !table->fulltext_searched) { @@ -5172,7 +5214,7 @@ static void add_not_null_conds(JOIN *join) SYNOPSIS add_found_match_trig_cond() tab the first inner table for most nested outer join - cond the predicate to be guarded + cond the predicate to be guarded (must be set) root_tab the first inner table to stop DESCRIPTION @@ -5190,12 +5232,11 @@ static COND* add_found_match_trig_cond(JOIN_TAB *tab, COND *cond, JOIN_TAB *root_tab) { COND *tmp; - if (tab == root_tab || !cond) + DBUG_ASSERT(cond != 0); + if (tab == root_tab) return cond; if ((tmp= add_found_match_trig_cond(tab->first_upper, cond, root_tab))) - { tmp= new Item_func_trig_cond(tmp, &tab->found); - } if (tmp) { tmp->quick_fix_field(); @@ -5356,6 +5397,10 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) for (uint i=join->const_tables ; i < join->tables ; i++) { JOIN_TAB *tab=join->join_tab+i; + /* + first_inner is the X in queries like: + SELECT * FROM t1 LEFT OUTER JOIN (t2 JOIN t3) ON X + */ JOIN_TAB *first_inner_tab= tab->first_inner; table_map current_map= tab->table->map; bool use_quick_range=0; @@ -5406,15 +5451,15 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) */ DBUG_PRINT("info", ("Item_int")); tmp= new Item_int((longlong) 1,1); // Always true - DBUG_PRINT("info", ("Item_int 0x%lx", (ulong)tmp)); } } if (tmp || !cond) { DBUG_EXECUTE("where",print_where(tmp,tab->table->alias);); - SQL_SELECT *sel=tab->select=(SQL_SELECT*) - thd->memdup((gptr) select, sizeof(SQL_SELECT)); + SQL_SELECT *sel= tab->select= ((SQL_SELECT*) + thd->memdup((gptr) select, + sizeof(*select))); if (!sel) DBUG_RETURN(1); // End of memory /* @@ -5641,6 +5686,7 @@ make_join_readinfo(JOIN *join, uint options) uint i; bool statistics= test(!(join->select_options & SELECT_DESCRIBE)); + bool sorted= 1; DBUG_ENTER("make_join_readinfo"); for (i=join->const_tables ; i < join->tables ; i++) @@ -5650,6 +5696,8 @@ make_join_readinfo(JOIN *join, uint options) tab->read_record.table= table; tab->read_record.file=table->file; tab->next_select=sub_select; /* normal select */ + tab->sorted= sorted; + sorted= 0; // only first must be sorted switch (tab->type) { case JT_SYSTEM: // Only happens with left join table->status=STATUS_NO_RECORD; @@ -8116,7 +8164,7 @@ const_expression_in_where(COND *cond, Item *comp_item, Item **const_item) new_created field */ -Field* create_tmp_field_from_field(THD *thd, Field* org_field, +Field *create_tmp_field_from_field(THD *thd, Field *org_field, const char *name, TABLE *table, Item_field *item, uint convert_blob_length) { @@ -8125,12 +8173,14 @@ Field* create_tmp_field_from_field(THD *thd, Field* org_field, if (convert_blob_length && (org_field->flags & BLOB_FLAG)) new_field= new Field_varstring(convert_blob_length, org_field->maybe_null(), - org_field->field_name, table, + org_field->field_name, table->s, org_field->charset()); else new_field= org_field->new_field(thd->mem_root, table); if (new_field) { + new_field->init(table); + new_field->orig_table= org_field->orig_table; if (item) item->result_field= new_field; else @@ -8173,18 +8223,18 @@ static Field *create_tmp_field_from_item(THD *thd, Item *item, TABLE *table, Item ***copy_func, bool modify_item, uint convert_blob_length) { - bool maybe_null=item->maybe_null; + bool maybe_null= item->maybe_null; Field *new_field; LINT_INIT(new_field); switch (item->result_type()) { case REAL_RESULT: - new_field=new Field_double(item->max_length, maybe_null, - item->name, table, item->decimals); + new_field= new Field_double(item->max_length, maybe_null, + item->name, item->decimals); break; case INT_RESULT: - new_field=new Field_longlong(item->max_length, maybe_null, - item->name, table, item->unsigned_flag); + new_field= new Field_longlong(item->max_length, maybe_null, + item->name, item->unsigned_flag); break; case STRING_RESULT: DBUG_ASSERT(item->collation.collation); @@ -8196,26 +8246,29 @@ static Field *create_tmp_field_from_item(THD *thd, Item *item, TABLE *table, */ if ((type= item->field_type()) == MYSQL_TYPE_DATETIME || type == MYSQL_TYPE_TIME || type == MYSQL_TYPE_DATE) - new_field= item->tmp_table_field_from_field_type(table); + new_field= item->tmp_table_field_from_field_type(table, 1); else if (item->max_length/item->collation.collation->mbmaxlen > 255 && convert_blob_length) new_field= new Field_varstring(convert_blob_length, maybe_null, - item->name, table, + item->name, table->s, item->collation.collation); else new_field= item->make_string_field(table); break; case DECIMAL_RESULT: new_field= new Field_new_decimal(item->max_length, maybe_null, item->name, - table, item->decimals, item->unsigned_flag); + item->decimals, item->unsigned_flag); break; case ROW_RESULT: default: // This case should never be choosen DBUG_ASSERT(0); - new_field= 0; // to satisfy compiler (uninitialized variable) + new_field= 0; break; } + if (new_field) + new_field->init(table); + if (copy_func && item->is_result_field()) *((*copy_func)++) = item; // Save for copy_funcs if (modify_item) @@ -8242,14 +8295,20 @@ Field *create_tmp_field_for_schema(THD *thd, Item *item, TABLE *table) { if (item->field_type() == MYSQL_TYPE_VARCHAR) { + Field *field; if (item->max_length > MAX_FIELD_VARCHARLENGTH / item->collation.collation->mbmaxlen) - return new Field_blob(item->max_length, item->maybe_null, - item->name, table, item->collation.collation); - return new Field_varstring(item->max_length, item->maybe_null, item->name, - table, item->collation.collation); + field= new Field_blob(item->max_length, item->maybe_null, + item->name, item->collation.collation); + else + field= new Field_varstring(item->max_length, item->maybe_null, + item->name, + table->s, item->collation.collation); + if (field) + field->init(table); + return field; } - return item->tmp_table_field_from_field_type(table); + return item->tmp_table_field_from_field_type(table, 0); } @@ -8300,11 +8359,13 @@ Field *create_tmp_field(THD *thd, TABLE *table,Item *item, Item::Type type, item= item->real_item(); type= Item::FIELD_ITEM; } + switch (type) { case Item::SUM_FUNC_ITEM: { Item_sum *item_sum=(Item_sum*) item; - Field *result= item_sum->create_tmp_field(group, table, convert_blob_length); + Field *result= item_sum->create_tmp_field(group, table, + convert_blob_length); if (!result) thd->fatal_error(); return result; @@ -8417,12 +8478,13 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, { MEM_ROOT *mem_root_save, own_root; TABLE *table; + TABLE_SHARE *share; uint i,field_count,null_count,null_pack_length; uint copy_func_count= param->func_count; uint hidden_null_count, hidden_null_pack_length, hidden_field_count; uint blob_count,group_null_items, string_count; uint temp_pool_slot=MY_BIT_NONE; - ulong reclength, string_total_length; + ulong reclength, string_total_length, fieldnr= 0; bool using_unique_constraint= 0; bool use_packed_rows= 0; bool not_all_columns= !(select_options & TMP_TABLE_ALL_COLUMNS); @@ -8445,7 +8507,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, statistic_increment(thd->status_var.created_tmp_tables, &LOCK_status); if (use_temp_pool && !(test_flags & TEST_KEEP_TMP_TABLES)) - temp_pool_slot = bitmap_set_next(&temp_pool); + temp_pool_slot = bitmap_lock_set_next(&temp_pool); if (temp_pool_slot != MY_BIT_NONE) // we got a slot sprintf(path, "%s_%lx_%i", tmp_file_prefix, @@ -8496,6 +8558,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, if (!multi_alloc_root(&own_root, &table, sizeof(*table), + &share, sizeof(*share), ®_field, sizeof(Field*) * (field_count+1), &blob_field, sizeof(uint)*(field_count+1), &from_field, sizeof(Field*)*field_count, @@ -8510,13 +8573,13 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, param->group_length : 0, NullS)) { - bitmap_clear_bit(&temp_pool, temp_pool_slot); + bitmap_lock_clear_bit(&temp_pool, temp_pool_slot); DBUG_RETURN(NULL); /* purecov: inspected */ } /* Copy_field belongs to TMP_TABLE_PARAM, allocate it in THD mem_root */ if (!(param->copy_field= copy= new (thd->mem_root) Copy_field[field_count])) { - bitmap_clear_bit(&temp_pool, temp_pool_slot); + bitmap_lock_clear_bit(&temp_pool, temp_pool_slot); free_root(&own_root, MYF(0)); /* purecov: inspected */ DBUG_RETURN(NULL); /* purecov: inspected */ } @@ -8544,19 +8607,17 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, table->used_keys.init(); table->keys_in_use_for_query.init(); - table->s= &table->share_not_to_be_used; - table->s->blob_field= blob_field; - table->s->table_name= table->s->path= tmpname; - table->s->db= ""; - table->s->blob_ptr_size= mi_portable_sizeof_char_ptr; - table->s->tmp_table= TMP_TABLE; - table->s->db_low_byte_first=1; // True for HEAP and MyISAM - table->s->table_charset= param->table_charset; - table->s->keys_for_keyread.init(); - table->s->keys_in_use.init(); + table->s= share; + init_tmp_table_share(share, "", 0, tmpname, tmpname); + share->blob_field= blob_field; + share->blob_ptr_size= mi_portable_sizeof_char_ptr; + share->db_low_byte_first=1; // True for HEAP and MyISAM + share->table_charset= param->table_charset; + share->primary_key= MAX_KEY; // Indicate no primary key + share->keys_for_keyread.init(); + share->keys_in_use.init(); /* For easier error reporting */ - table->s->table_cache_key= (char*) (table->s->db= ""); - + share->table_cache_key= share->db; /* Calculate which type of fields we will store in the temporary table */ @@ -8631,6 +8692,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, (*argp)->maybe_null=1; } new_field->query_id= thd->query_id; + new_field->fieldnr= ++fieldnr; } } } @@ -8682,6 +8744,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, new_field->flags|= GROUP_FLAG; } new_field->query_id= thd->query_id; + new_field->fieldnr= ++fieldnr; new_field->field_index= (uint) (reg_field - table->field); *(reg_field++) =new_field; } @@ -8703,14 +8766,15 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, DBUG_ASSERT(field_count >= (uint) (reg_field - table->field)); field_count= (uint) (reg_field - table->field); *blob_field= 0; // End marker + share->fields= field_count; /* If result table is small; use a heap */ if (blob_count || using_unique_constraint || (select_options & (OPTION_BIG_TABLES | SELECT_SMALL_RESULT)) == OPTION_BIG_TABLES || (select_options & TMP_TABLE_FORCE_MYISAM)) { - table->file= get_new_handler(table, &table->mem_root, - table->s->db_type= DB_TYPE_MYISAM); + table->file= get_new_handler(share, &table->mem_root, + share->db_type= &myisam_hton); if (group && (param->group_parts > table->file->max_key_parts() || param->group_length > table->file->max_key_length())) @@ -8718,14 +8782,16 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, } else { - table->file= get_new_handler(table, &table->mem_root, - table->s->db_type= DB_TYPE_HEAP); + table->file= get_new_handler(share, &table->mem_root, + share->db_type= &heap_hton); } + if (!table->file) + goto err; if (!using_unique_constraint) reclength+= group_null_items; // null flag is stored separately - table->s->blob_fields= blob_count; + share->blob_fields= blob_count; if (blob_count == 0) { /* We need to ensure that first byte is not 0 for the delete link */ @@ -8747,16 +8813,15 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, string_total_length / string_count >= AVG_STRING_LENGTH_TO_PACK_ROWS)) use_packed_rows= 1; - table->s->fields= field_count; - table->s->reclength= reclength; + share->reclength= reclength; { uint alloc_length=ALIGN_SIZE(reclength+MI_UNIQUE_HASH_LENGTH+1); - table->s->rec_buff_length= alloc_length; + share->rec_buff_length= alloc_length; if (!(table->record[0]= (byte*) alloc_root(&table->mem_root, alloc_length*3))) goto err; table->record[1]= table->record[0]+alloc_length; - table->s->default_values= table->record[1]+alloc_length; + share->default_values= table->record[1]+alloc_length; } copy_func[0]=0; // End marker @@ -8772,8 +8837,8 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, bfill(null_flags,null_pack_length,255); // Set null fields table->null_flags= (uchar*) table->record[0]; - table->s->null_fields= null_count+ hidden_null_count; - table->s->null_bytes= null_pack_length; + share->null_fields= null_count+ hidden_null_count; + share->null_bytes= null_pack_length; } null_count= (blob_count == 0) ? 1 : 0; hidden_field_count=param->hidden_field_count; @@ -8846,13 +8911,13 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, store_record(table,s->default_values); // Make empty default record if (thd->variables.tmp_table_size == ~(ulong) 0) // No limit - table->s->max_rows= ~(ha_rows) 0; + share->max_rows= ~(ha_rows) 0; else - table->s->max_rows= (((table->s->db_type == DB_TYPE_HEAP) ? + share->max_rows= (((share->db_type == &heap_hton) ? min(thd->variables.tmp_table_size, thd->variables.max_heap_table_size) : - thd->variables.tmp_table_size)/ table->s->reclength); - set_if_bigger(table->s->max_rows,1); // For dummy start options + thd->variables.tmp_table_size)/ share->reclength); + set_if_bigger(share->max_rows,1); // For dummy start options keyinfo= param->keyinfo; if (group) @@ -8860,8 +8925,8 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, DBUG_PRINT("info",("Creating group key in temporary table")); table->group=group; /* Table is grouped by key */ param->group_buff=group_buff; - table->s->keys=1; - table->s->uniques= test(using_unique_constraint); + share->keys=1; + share->uniques= test(using_unique_constraint); table->key_info=keyinfo; keyinfo->key_part=key_part_info; keyinfo->flags=HA_NOSAME; @@ -8929,14 +8994,14 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, null_pack_length-=hidden_null_pack_length; keyinfo->key_parts= ((field_count-param->hidden_field_count)+ test(null_pack_length)); - set_if_smaller(table->s->max_rows, rows_limit); + set_if_smaller(share->max_rows, rows_limit); param->end_write_records= rows_limit; table->distinct= 1; - table->s->keys= 1; + share->keys= 1; if (blob_count) { using_unique_constraint=1; - table->s->uniques= 1; + share->uniques= 1; } if (!(key_part_info= (KEY_PART_INFO*) alloc_root(&table->mem_root, @@ -8955,12 +9020,15 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, key_part_info->null_bit=0; key_part_info->offset=hidden_null_pack_length; key_part_info->length=null_pack_length; - key_part_info->field=new Field_string((char*) table->record[0], - (uint32) key_part_info->length, - (uchar*) 0, - (uint) 0, - Field::NONE, - NullS, table, &my_charset_bin); + key_part_info->field= new Field_string((char*) table->record[0], + (uint32) key_part_info->length, + (uchar*) 0, + (uint) 0, + Field::NONE, + NullS, &my_charset_bin); + if (!key_part_info->field) + goto err; + key_part_info->field->init(table); key_part_info->key_type=FIELDFLAG_BINARY; key_part_info->type= HA_KEYTYPE_BINARY; key_part_info++; @@ -8984,8 +9052,8 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, } if (thd->is_fatal_error) // If end of memory goto err; /* purecov: inspected */ - table->s->db_record_offset= 1; - if (table->s->db_type == DB_TYPE_MYISAM) + share->db_record_offset= 1; + if (share->db_type == &myisam_hton) { if (create_myisam_tmp_table(table,param,select_options)) goto err; @@ -8993,6 +9061,8 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, if (open_tmp_table(table)) goto err; + table->file->ha_set_all_bits_in_read_set(); + table->file->ha_set_all_bits_in_write_set(); thd->mem_root= mem_root_save; DBUG_RETURN(table); @@ -9000,7 +9070,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, err: thd->mem_root= mem_root_save; free_tmp_table(thd,table); /* purecov: inspected */ - bitmap_clear_bit(&temp_pool, temp_pool_slot); + bitmap_lock_clear_bit(&temp_pool, temp_pool_slot); DBUG_RETURN(NULL); /* purecov: inspected */ } @@ -9017,7 +9087,7 @@ err: field_list list of column definitions DESCRIPTION - The created table doesn't have a table handler assotiated with + The created table doesn't have a table handler associated with it, has no keys, no group/distinct, no copy_funcs array. The sole purpose of this TABLE object is to use the power of Field class to read/write data to/from table->record[0]. Then one can store @@ -9038,67 +9108,68 @@ TABLE *create_virtual_tmp_table(THD *thd, List<create_field> &field_list) uint record_length= 0; uint null_count= 0; /* number of columns which may be null */ uint null_pack_length; /* NULL representation array length */ - TABLE_SHARE *s; + TABLE_SHARE *share; /* Create the table and list of all fields */ - TABLE *table= (TABLE*) thd->calloc(sizeof(*table)); + TABLE *table= (TABLE*) thd->calloc(sizeof(*table)+sizeof(*share)); field= (Field**) thd->alloc((field_count + 1) * sizeof(Field*)); if (!table || !field) return 0; table->field= field; - table->s= s= &table->share_not_to_be_used; - s->fields= field_count; + table->s= share= (TABLE_SHARE*) (table+1); + share->fields= field_count; - if (!(s->blob_field= (uint*)thd->alloc((field_list.elements + 1) * - sizeof(uint)))) + if (!(share->blob_field= (uint*)thd->alloc((field_list.elements + 1) * + sizeof(uint)))) return 0; - s->blob_ptr_size= mi_portable_sizeof_char_ptr; + share->blob_ptr_size= mi_portable_sizeof_char_ptr; /* Create all fields and calculate the total length of record */ List_iterator_fast<create_field> it(field_list); while ((cdef= it++)) { - *field= make_field(0, cdef->length, + *field= make_field(share, 0, cdef->length, (uchar*) (f_maybe_null(cdef->pack_flag) ? "" : 0), f_maybe_null(cdef->pack_flag) ? 1 : 0, cdef->pack_flag, cdef->sql_type, cdef->charset, cdef->geom_type, cdef->unireg_check, - cdef->interval, cdef->field_name, table); + cdef->interval, cdef->field_name); if (!*field) goto error; - record_length+= (**field).pack_length(); - if (! ((**field).flags & NOT_NULL_FLAG)) - ++null_count; + (*field)->init(table); + record_length+= (*field)->pack_length(); + if (! ((*field)->flags & NOT_NULL_FLAG)) + null_count++; if ((*field)->flags & BLOB_FLAG) - s->blob_field[blob_count++]= (uint) (field - table->field); + share->blob_field[blob_count++]= (uint) (field - table->field); - ++field; + field++; } *field= NULL; /* mark the end of the list */ - s->blob_field[blob_count]= 0; /* mark the end of the list */ - s->blob_fields= blob_count; + share->blob_field[blob_count]= 0; /* mark the end of the list */ + share->blob_fields= blob_count; null_pack_length= (null_count + 7)/8; - s->reclength= record_length + null_pack_length; - s->rec_buff_length= ALIGN_SIZE(s->reclength + 1); - table->record[0]= (byte*) thd->alloc(s->rec_buff_length); + share->reclength= record_length + null_pack_length; + share->rec_buff_length= ALIGN_SIZE(share->reclength + 1); + table->record[0]= (byte*) thd->alloc(share->rec_buff_length); if (!table->record[0]) goto error; if (null_pack_length) { table->null_flags= (uchar*) table->record[0]; - s->null_fields= null_count; - s->null_bytes= null_pack_length; + share->null_fields= null_count; + share->null_bytes= null_pack_length; } table->in_use= thd; /* field->reset() may access table->in_use */ { /* Set up field pointers */ byte *null_pos= table->record[0]; - byte *field_pos= null_pos + s->null_bytes; + byte *field_pos= null_pos + share->null_bytes; uint null_bit= 1; for (field= table->field; *field; ++field) @@ -9132,7 +9203,7 @@ error: static bool open_tmp_table(TABLE *table) { int error; - if ((error=table->file->ha_open(table->s->table_name,O_RDWR, + if ((error=table->file->ha_open(table, table->s->table_name.str,O_RDWR, HA_OPEN_TMP_TABLE))) { table->file->print_error(error,MYF(0)); /* purecov: inspected */ @@ -9151,9 +9222,10 @@ static bool create_myisam_tmp_table(TABLE *table,TMP_TABLE_PARAM *param, MI_KEYDEF keydef; MI_UNIQUEDEF uniquedef; KEY *keyinfo=param->keyinfo; + TABLE_SHARE *share= table->s; DBUG_ENTER("create_myisam_tmp_table"); - if (table->s->keys) + if (share->keys) { // Get keys for ni_create bool using_unique_constraint=0; HA_KEYSEG *seg= (HA_KEYSEG*) alloc_root(&table->mem_root, @@ -9164,11 +9236,11 @@ static bool create_myisam_tmp_table(TABLE *table,TMP_TABLE_PARAM *param, bzero(seg, sizeof(*seg) * keyinfo->key_parts); if (keyinfo->key_length >= table->file->max_key_length() || keyinfo->key_parts > table->file->max_key_parts() || - table->s->uniques) + share->uniques) { /* Can't create a key; Make a unique constraint instead of a key */ - table->s->keys= 0; - table->s->uniques= 1; + share->keys= 0; + share->uniques= 1; using_unique_constraint=1; bzero((char*) &uniquedef,sizeof(uniquedef)); uniquedef.keysegs=keyinfo->key_parts; @@ -9180,7 +9252,7 @@ static bool create_myisam_tmp_table(TABLE *table,TMP_TABLE_PARAM *param, param->recinfo->type= FIELD_CHECK; param->recinfo->length=MI_UNIQUE_HASH_LENGTH; param->recinfo++; - table->s->reclength+=MI_UNIQUE_HASH_LENGTH; + share->reclength+=MI_UNIQUE_HASH_LENGTH; } else { @@ -9202,7 +9274,7 @@ static bool create_myisam_tmp_table(TABLE *table,TMP_TABLE_PARAM *param, seg->type= ((keyinfo->key_part[i].key_type & FIELDFLAG_BINARY) ? HA_KEYTYPE_VARBINARY2 : HA_KEYTYPE_VARTEXT2); - seg->bit_start= (uint8)(field->pack_length() - table->s->blob_ptr_size); + seg->bit_start= (uint8)(field->pack_length() - share->blob_ptr_size); seg->flag= HA_BLOB_PART; seg->length=0; // Whole blob in unique constraint } @@ -9235,10 +9307,10 @@ static bool create_myisam_tmp_table(TABLE *table,TMP_TABLE_PARAM *param, OPTION_BIG_TABLES) create_info.data_file_length= ~(ulonglong) 0; - if ((error=mi_create(table->s->table_name,table->s->keys,&keydef, + if ((error=mi_create(share->table_name.str, share->keys, &keydef, (uint) (param->recinfo-param->start_recinfo), param->start_recinfo, - table->s->uniques, &uniquedef, + share->uniques, &uniquedef, &create_info, HA_CREATE_TMP_TABLE))) { @@ -9248,7 +9320,7 @@ static bool create_myisam_tmp_table(TABLE *table,TMP_TABLE_PARAM *param, } statistic_increment(table->in_use->status_var.created_tmp_disk_tables, &LOCK_status); - table->s->db_record_offset= 1; + share->db_record_offset= 1; DBUG_RETURN(0); err: DBUG_RETURN(1); @@ -9269,17 +9341,9 @@ free_tmp_table(THD *thd, TABLE *entry) if (entry->file) { if (entry->db_stat) - { - (void) entry->file->close(); - } - /* - We can't call ha_delete_table here as the table may created in mixed case - here and we have to ensure that delete_table gets the table name in - the original case. - */ - if (!(test_flags & TEST_KEEP_TMP_TABLES) || - entry->s->db_type == DB_TYPE_HEAP) - entry->file->delete_table(entry->s->table_name); + entry->file->drop_table(entry->s->table_name.str); + else + entry->file->delete_table(entry->s->table_name.str); delete entry->file; } @@ -9288,7 +9352,7 @@ free_tmp_table(THD *thd, TABLE *entry) (*ptr)->free(); free_io_cache(entry); - bitmap_clear_bit(&temp_pool, entry->temp_pool_slot); + bitmap_lock_clear_bit(&temp_pool, entry->temp_pool_slot); free_root(&own_root, MYF(0)); /* the table is allocated in its own root */ thd->proc_info=save_proc_info; @@ -9304,26 +9368,29 @@ bool create_myisam_from_heap(THD *thd, TABLE *table, TMP_TABLE_PARAM *param, int error, bool ignore_last_dupp_key_error) { TABLE new_table; + TABLE_SHARE share; const char *save_proc_info; int write_err; DBUG_ENTER("create_myisam_from_heap"); - if (table->s->db_type != DB_TYPE_HEAP || error != HA_ERR_RECORD_FILE_FULL) + if (table->s->db_type != &heap_hton || + error != HA_ERR_RECORD_FILE_FULL) { table->file->print_error(error,MYF(0)); DBUG_RETURN(1); } new_table= *table; - new_table.s= &new_table.share_not_to_be_used; - new_table.s->db_type= DB_TYPE_MYISAM; - if (!(new_table.file= get_new_handler(&new_table, &new_table.mem_root, - DB_TYPE_MYISAM))) + share= *table->s; + new_table.s= &share; + new_table.s->db_type= &myisam_hton; + if (!(new_table.file= get_new_handler(&share, &new_table.mem_root, + &myisam_hton))) DBUG_RETURN(1); // End of memory save_proc_info=thd->proc_info; thd->proc_info="converting HEAP to MyISAM"; - if (create_myisam_tmp_table(&new_table,param, + if (create_myisam_tmp_table(&new_table, param, thd->lex->select_lex.options | thd->options)) goto err2; if (open_tmp_table(&new_table)) @@ -9350,14 +9417,19 @@ bool create_myisam_from_heap(THD *thd, TABLE *table, TMP_TABLE_PARAM *param, new_table.file->extra(HA_EXTRA_WRITE_CACHE); #endif - /* copy all old rows */ + /* + copy all old rows from heap table to MyISAM table + This is the only code that uses record[1] to read/write but this + is safe as this is a temporary MyISAM table without timestamp/autoincrement + or partitioning. + */ while (!table->file->rnd_next(new_table.record[1])) { - if ((write_err=new_table.file->write_row(new_table.record[1]))) + if ((write_err=new_table.file->ha_write_row(new_table.record[1]))) goto err; } /* copy row that filled HEAP table */ - if ((write_err=new_table.file->write_row(table->record[0]))) + if ((write_err=new_table.file->ha_write_row(table->record[0]))) { if (write_err != HA_ERR_FOUND_DUPP_KEY && write_err != HA_ERR_FOUND_DUPP_UNIQUE || !ignore_last_dupp_key_error) @@ -9367,12 +9439,13 @@ bool create_myisam_from_heap(THD *thd, TABLE *table, TMP_TABLE_PARAM *param, /* remove heap table and change to use myisam table */ (void) table->file->ha_rnd_end(); (void) table->file->close(); - (void) table->file->delete_table(table->s->table_name); + (void) table->file->delete_table(table->s->table_name.str); delete table->file; table->file=0; + new_table.s= table->s; // Keep old share *table= new_table; - table->s= &table->share_not_to_be_used; - table->file->change_table_ptr(table); + *table->s= share; + table->file->change_table_ptr(table, table->s); if (save_proc_info) thd->proc_info= (!strcmp(save_proc_info,"Copying to tmp table") ? "Copying to tmp table on disk" : save_proc_info); @@ -9384,9 +9457,9 @@ bool create_myisam_from_heap(THD *thd, TABLE *table, TMP_TABLE_PARAM *param, (void) table->file->ha_rnd_end(); (void) new_table.file->close(); err1: - new_table.file->delete_table(new_table.s->table_name); - delete new_table.file; + new_table.file->delete_table(new_table.s->table_name.str); err2: + delete new_table.file; thd->proc_info=save_proc_info; DBUG_RETURN(1); } @@ -9491,7 +9564,7 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure) empty_record(table); if (table->group && join->tmp_table_param.sum_func_count && table->s->keys && !table->file->inited) - table->file->ha_index_init(0); + table->file->ha_index_init(0, 0); } /* Set up select_end */ join->join_tab[join->tables-1].next_select= setup_end_select_func(join); @@ -9632,7 +9705,7 @@ sub_select_cache(JOIN *join,JOIN_TAB *join_tab,bool end_of_records) given the selected plan prescribes to nest retrievals of the joined tables in the following order: t1,t2,t3. A pushed down predicate are attached to the table which it pushed to, - at the field select_cond. + at the field join_tab->select_cond. When executing a nested loop of level k the function runs through the rows of 'join_tab' and for each row checks the pushed condition attached to the table. @@ -9671,7 +9744,7 @@ sub_select_cache(JOIN *join,JOIN_TAB *join_tab,bool end_of_records) is complemented by nulls for t2 and t3. Then the pushed down predicates are checked for the composed row almost in the same way as it had been done for the first row with a match. The only difference is - the predicates from on expressions are not checked. + the predicates from on expressions are not checked. IMPLEMENTATION The function forms output rows for a current partial join of k @@ -9680,7 +9753,7 @@ sub_select_cache(JOIN *join,JOIN_TAB *join_tab,bool end_of_records) join_tab it calls sub_select that builds all possible matching tails from the result set. To be able check predicates conditionally items of the class - Item_func_trig_cond are employed. + Item_func_trig_cond are employed. An object of this class is constructed from an item of class COND and a pointer to a guarding boolean variable. When the value of the guard variable is true the value of the object @@ -10195,7 +10268,7 @@ join_read_const(JOIN_TAB *tab) table->status= STATUS_NOT_FOUND; mark_as_null_row(tab->table); empty_record(table); - if (error != HA_ERR_KEY_NOT_FOUND) + if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) return report_error(table, error); return -1; } @@ -10218,7 +10291,9 @@ join_read_key(JOIN_TAB *tab) TABLE *table= tab->table; if (!table->file->inited) - table->file->ha_index_init(tab->ref.key); + { + table->file->ha_index_init(tab->ref.key, tab->sorted); + } if (cmp_buffer_with_ref(tab) || (table->status & (STATUS_GARBAGE | STATUS_NO_PARENT | STATUS_NULL_ROW))) { @@ -10230,7 +10305,7 @@ join_read_key(JOIN_TAB *tab) error=table->file->index_read(table->record[0], tab->ref.key_buff, tab->ref.key_length,HA_READ_KEY_EXACT); - if (error && error != HA_ERR_KEY_NOT_FOUND) + if (error && error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) return report_error(table, error); } table->null_row=0; @@ -10250,14 +10325,16 @@ join_read_always_key(JOIN_TAB *tab) return -1; } if (!table->file->inited) - table->file->ha_index_init(tab->ref.key); + { + table->file->ha_index_init(tab->ref.key, tab->sorted); + } if (cp_buffer_from_ref(tab->join->thd, &tab->ref)) return -1; if ((error=table->file->index_read(table->record[0], tab->ref.key_buff, tab->ref.key_length,HA_READ_KEY_EXACT))) { - if (error != HA_ERR_KEY_NOT_FOUND) + if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) return report_error(table, error); return -1; /* purecov: inspected */ } @@ -10277,14 +10354,14 @@ join_read_last_key(JOIN_TAB *tab) TABLE *table= tab->table; if (!table->file->inited) - table->file->ha_index_init(tab->ref.key); + table->file->ha_index_init(tab->ref.key, tab->sorted); if (cp_buffer_from_ref(tab->join->thd, &tab->ref)) return -1; if ((error=table->file->index_read_last(table->record[0], tab->ref.key_buff, tab->ref.key_length))) { - if (error != HA_ERR_KEY_NOT_FOUND) + if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) return report_error(table, error); return -1; /* purecov: inspected */ } @@ -10387,7 +10464,7 @@ join_read_first(JOIN_TAB *tab) tab->read_record.index=tab->index; tab->read_record.record=table->record[0]; if (!table->file->inited) - table->file->ha_index_init(tab->index); + table->file->ha_index_init(tab->index, tab->sorted); if ((error=tab->table->file->index_first(tab->table->record[0]))) { if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) @@ -10426,7 +10503,7 @@ join_read_last(JOIN_TAB *tab) tab->read_record.index=tab->index; tab->read_record.record=table->record[0]; if (!table->file->inited) - table->file->ha_index_init(tab->index); + table->file->ha_index_init(tab->index, 1); if ((error= tab->table->file->index_last(tab->table->record[0]))) return report_error(table, error); return 0; @@ -10450,7 +10527,7 @@ join_ft_read_first(JOIN_TAB *tab) TABLE *table= tab->table; if (!table->file->inited) - table->file->ha_index_init(tab->ref.key); + table->file->ha_index_init(tab->ref.key, 1); #if NOT_USED_YET if (cp_buffer_from_ref(tab->join->thd, &tab->ref)) // as ft-key doesn't use store_key's return -1; // see also FT_SELECT::init() @@ -10752,7 +10829,7 @@ end_write(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), { int error; join->found_records++; - if ((error=table->file->write_row(table->record[0]))) + if ((error=table->file->ha_write_row(table->record[0]))) { if (error == HA_ERR_FOUND_DUPP_KEY || error == HA_ERR_FOUND_DUPP_UNIQUE) @@ -10814,8 +10891,8 @@ end_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), { /* Update old record */ restore_record(table,record[1]); update_tmptable_sum_func(join->sum_funcs,table); - if ((error=table->file->update_row(table->record[1], - table->record[0]))) + if ((error=table->file->ha_update_row(table->record[1], + table->record[0]))) { table->file->print_error(error,MYF(0)); /* purecov: inspected */ DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ @@ -10838,13 +10915,13 @@ end_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), } init_tmptable_sum_functions(join->sum_funcs); copy_funcs(join->tmp_table_param.items_to_copy); - if ((error=table->file->write_row(table->record[0]))) + if ((error=table->file->ha_write_row(table->record[0]))) { if (create_myisam_from_heap(join->thd, table, &join->tmp_table_param, error, 0)) DBUG_RETURN(NESTED_LOOP_ERROR); // Not a table_is_full error /* Change method to update rows */ - table->file->ha_index_init(0); + table->file->ha_index_init(0, 0); join->join_tab[join->tables-1].next_select=end_unique_update; } join->send_records++; @@ -10874,7 +10951,7 @@ end_unique_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), copy_fields(&join->tmp_table_param); // Groups are copied twice. copy_funcs(join->tmp_table_param.items_to_copy); - if (!(error=table->file->write_row(table->record[0]))) + if (!(error=table->file->ha_write_row(table->record[0]))) join->send_records++; // New group else { @@ -10890,8 +10967,8 @@ end_unique_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), } restore_record(table,record[1]); update_tmptable_sum_func(join->sum_funcs,table); - if ((error=table->file->update_row(table->record[1], - table->record[0]))) + if ((error=table->file->ha_update_row(table->record[1], + table->record[0]))) { table->file->print_error(error,MYF(0)); /* purecov: inspected */ DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ @@ -10934,7 +11011,7 @@ end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), join->sum_funcs_end[send_group_parts]); if (!join->having || join->having->val_int()) { - int error= table->file->write_row(table->record[0]); + int error= table->file->ha_write_row(table->record[0]); if (error && create_myisam_from_heap(join->thd, table, &join->tmp_table_param, error, 0)) @@ -11753,7 +11830,7 @@ remove_duplicates(JOIN *join, TABLE *entry,List<Item> &fields, Item *having) free_io_cache(entry); // Safety entry->file->info(HA_STATUS_VARIABLE); - if (entry->s->db_type == DB_TYPE_HEAP || + if (entry->s->db_type == &heap_hton || (!entry->s->blob_fields && ((ALIGN_SIZE(reclength) + HASH_OVERHEAD) * entry->file->records < thd->variables.sortbuff_size))) @@ -11802,7 +11879,7 @@ static int remove_dup_with_compare(THD *thd, TABLE *table, Field **first_field, } if (having && !having->val_int()) { - if ((error=file->delete_row(record))) + if ((error=file->ha_delete_row(record))) goto err; error=file->rnd_next(record); continue; @@ -11829,7 +11906,7 @@ static int remove_dup_with_compare(THD *thd, TABLE *table, Field **first_field, } if (compare_record(table, first_field) == 0) { - if ((error=file->delete_row(record))) + if ((error=file->ha_delete_row(record))) goto err; } else if (!found) @@ -11926,7 +12003,7 @@ static int remove_dup_with_hash_index(THD *thd, TABLE *table, } if (having && !having->val_int()) { - if ((error=file->delete_row(record))) + if ((error=file->ha_delete_row(record))) goto err; continue; } @@ -11943,7 +12020,7 @@ static int remove_dup_with_hash_index(THD *thd, TABLE *table, if (hash_search(&hash, org_key_pos, key_length)) { /* Duplicated found ; Remove the row */ - if ((error=file->delete_row(record))) + if ((error=file->ha_delete_row(record))) goto err; } else @@ -13152,7 +13229,7 @@ change_to_use_tmp_fields(THD *thd, Item **ref_pointer_array, for (i= 0; (item= it++); i++) { Field *field; - + if (item->with_sum_func && item->type() != Item::SUM_FUNC_ITEM) item_field= item; else @@ -13171,7 +13248,7 @@ change_to_use_tmp_fields(THD *thd, Item **ref_pointer_array, DBUG_RETURN(TRUE); // Fatal error item_field->name= item->name; #ifndef DBUG_OFF - if (_db_on_ && !item_field->name) + if (!item_field->name) { char buff[256]; String str(buff,sizeof(buff),&my_charset_bin); @@ -13771,7 +13848,7 @@ int JOIN::rollup_write_data(uint idx, TABLE *table) item->save_in_result_field(1); } copy_sum_funcs(sum_funcs_end[i+1], sum_funcs_end[i]); - if ((error= table->file->write_row(table->record[0]))) + if ((error= table->file->ha_write_row(table->record[0]))) { if (create_myisam_from_heap(thd, table, &tmp_table_param, error, 0)) @@ -13837,6 +13914,9 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, strlen(join->select_lex->type), cs)); for (uint i=0 ; i < 7; i++) item_list.push_back(item_null); + if (join->thd->lex->describe & DESCRIBE_PARTITIONS) + item_list.push_back(item_null); + item_list.push_back(new Item_string(message,strlen(message),cs)); if (result->send_data(item_list)) join->error= 1; @@ -13957,7 +14037,28 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, item_list.push_back(new Item_string(table->alias, strlen(table->alias), cs)); - /* type */ + /* "partitions" column */ + if (join->thd->lex->describe & DESCRIBE_PARTITIONS) + { +#ifdef WITH_PARTITION_STORAGE_ENGINE + partition_info *part_info; + if (!table->derived_select_number && + (part_info= table->part_info)) + { + char parts_buff[128]; + String parts_str(parts_buff,sizeof(parts_buff),cs); + make_used_partitions_str(part_info, &parts_str); + item_list.push_back(new Item_string(parts_str.ptr(), + parts_str.length(), cs)); + } + else + item_list.push_back(item_null); +#else + /* just produce empty column if partitioning is not compiled in */ + item_list.push_back(item_null); +#endif + } + /* "type" column */ item_list.push_back(new Item_string(join_type_str[tab->type], strlen(join_type_str[tab->type]), cs)); diff --git a/sql/sql_select.h b/sql/sql_select.h index 01ed8048e4a..459d2ff89a8 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -134,6 +134,7 @@ typedef struct st_join_table { uint used_fields,used_fieldlength,used_blobs; enum join_type type; bool cached_eq_ref_table,eq_ref_table,not_used_in_distinct; + bool sorted; TABLE_REF ref; JOIN_CACHE cache; JOIN *join; @@ -447,10 +448,11 @@ class store_key :public Sql_alloc { if (field_arg->type() == FIELD_TYPE_BLOB) { - /* Key segments are always packed with a 2 byte length prefix */ - to_field=new Field_varstring(ptr, length, 2, (uchar*) null, 1, - Field::NONE, field_arg->field_name, - field_arg->table, field_arg->charset()); + /* Key segments are always packed with a 2 byte length prefix */ + to_field= new Field_varstring(ptr, length, 2, (uchar*) null, 1, + Field::NONE, field_arg->field_name, + field_arg->table->s, field_arg->charset()); + to_field->init(field_arg->table); } else to_field=field_arg->new_key_field(thd->mem_root, field_arg->table, diff --git a/sql/sql_show.cc b/sql/sql_show.cc index 8920efa87ab..9e41c541c4f 100644 --- a/sql/sql_show.cc +++ b/sql/sql_show.cc @@ -19,14 +19,17 @@ #include "mysql_priv.h" #include "sql_select.h" // For select_describe +#include "sql_show.h" #include "repl_failsafe.h" #include "sp.h" #include "sp_head.h" #include "sql_trigger.h" +#include "authors.h" +#include "event.h" #include <my_dir.h> -#ifdef HAVE_BERKELEY_DB -#include "ha_berkeley.h" // For berkeley_show_logs +#ifdef WITH_PARTITION_STORAGE_ENGINE +#include "ha_partition.h" #endif static const char *grant_names[]={ @@ -39,17 +42,36 @@ static TYPELIB grant_types = { sizeof(grant_names)/sizeof(char **), grant_names, NULL}; #endif -static int -store_create_info(THD *thd, TABLE_LIST *table_list, String *packet); -static int -view_store_create_info(THD *thd, TABLE_LIST *table, String *buff); -static bool schema_table_store_record(THD *thd, TABLE *table); - - /*************************************************************************** ** List all table types supported ***************************************************************************/ +static my_bool show_handlerton(THD *thd, st_plugin_int *plugin, + void *arg) +{ + handlerton *default_type= (handlerton *) arg; + Protocol *protocol= thd->protocol; + handlerton *hton= (handlerton *) plugin->plugin->info; + + if (!(hton->flags & HTON_HIDDEN)) + { + protocol->prepare_for_resend(); + protocol->store(hton->name, system_charset_info); + const char *option_name= show_comp_option_name[(int) hton->state]; + + if (hton->state == SHOW_OPTION_YES && default_type == hton) + option_name= "DEFAULT"; + protocol->store(option_name, system_charset_info); + protocol->store(hton->comment, system_charset_info); + protocol->store(hton->commit ? "YES" : "NO", system_charset_info); + protocol->store(hton->prepare ? "YES" : "NO", system_charset_info); + protocol->store(hton->savepoint_set ? "YES" : "NO", system_charset_info); + + return protocol->write() ? 1 : 0; + } + return 0; +} + bool mysqld_show_storage_engines(THD *thd) { List<Item> field_list; @@ -59,37 +81,151 @@ bool mysqld_show_storage_engines(THD *thd) field_list.push_back(new Item_empty_string("Engine",10)); field_list.push_back(new Item_empty_string("Support",10)); field_list.push_back(new Item_empty_string("Comment",80)); + field_list.push_back(new Item_empty_string("Transactions",3)); + field_list.push_back(new Item_empty_string("XA",3)); + field_list.push_back(new Item_empty_string("Savepoints",3)); if (protocol->send_fields(&field_list, Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)) DBUG_RETURN(TRUE); - const char *default_type_name= - ha_get_storage_engine((enum db_type)thd->variables.table_type); + if (plugin_foreach(thd, show_handlerton, + MYSQL_STORAGE_ENGINE_PLUGIN, thd->variables.table_type)) + DBUG_RETURN(TRUE); - handlerton **types; - for (types= sys_table_types; *types; types++) + send_eof(thd); + DBUG_RETURN(FALSE); +} + +static int make_version_string(char *buf, int buf_length, uint version) +{ + return my_snprintf(buf, buf_length, "%d.%d", version>>8,version&0xff); +} + +static my_bool show_plugins(THD *thd, st_plugin_int *plugin, + void *arg) +{ + TABLE *table= (TABLE*) arg; + struct st_mysql_plugin *plug= plugin->plugin; + Protocol *protocol= thd->protocol; + CHARSET_INFO *cs= system_charset_info; + char version_buf[20]; + + restore_record(table, s->default_values); + + table->field[0]->store(plugin->name.str, plugin->name.length, cs); + + table->field[1]->store(version_buf, + make_version_string(version_buf, sizeof(version_buf), plug->version), + cs); + + + switch (plugin->state) { - if (!((*types)->flags & HTON_HIDDEN)) - { - protocol->prepare_for_resend(); - protocol->store((*types)->name, system_charset_info); - const char *option_name= show_comp_option_name[(int) (*types)->state]; - - if ((*types)->state == SHOW_OPTION_YES && - !my_strcasecmp(system_charset_info, default_type_name, (*types)->name)) - option_name= "DEFAULT"; - protocol->store(option_name, system_charset_info); - protocol->store((*types)->comment, system_charset_info); - if (protocol->write()) - DBUG_RETURN(TRUE); - } + /* case PLUGIN_IS_FREED: does not happen */ + case PLUGIN_IS_DELETED: + table->field[2]->store(STRING_WITH_LEN("DELETED"), cs); + break; + case PLUGIN_IS_UNINITIALIZED: + table->field[2]->store(STRING_WITH_LEN("INACTIVE"), cs); + break; + case PLUGIN_IS_READY: + table->field[2]->store(STRING_WITH_LEN("ACTIVE"), cs); + break; + default: + DBUG_ASSERT(0); + } + + table->field[3]->store(plugin_type_names[plug->type].str, + plugin_type_names[plug->type].length, + cs); + table->field[4]->store(version_buf, + make_version_string(version_buf, sizeof(version_buf), + *(uint *)plug->info), cs); + + if (plugin->plugin_dl) + { + table->field[5]->store(plugin->plugin_dl->dl.str, + plugin->plugin_dl->dl.length, cs); + table->field[5]->set_notnull(); + table->field[6]->store(version_buf, + make_version_string(version_buf, sizeof(version_buf), + plugin->plugin_dl->version), + cs); + table->field[6]->set_notnull(); + } + else + { + table->field[5]->set_null(); + table->field[6]->set_null(); + } + + + if (plug->author) + { + table->field[7]->store(plug->author, strlen(plug->author), cs); + table->field[7]->set_notnull(); + } + else + table->field[7]->set_null(); + + if (plug->descr) + { + table->field[8]->store(plug->descr, strlen(plug->descr), cs); + table->field[8]->set_notnull(); + } + else + table->field[8]->set_null(); + + return schema_table_store_record(thd, table); +} + + +int fill_plugins(THD *thd, TABLE_LIST *tables, COND *cond) +{ + DBUG_ENTER("fill_plugins"); + TABLE *table= tables->table; + + if (plugin_foreach(thd, show_plugins, MYSQL_ANY_PLUGIN, table)) + DBUG_RETURN(1); + + DBUG_RETURN(0); +} + + +/*************************************************************************** +** List all Authors. +** If you can update it, you get to be in it :) +***************************************************************************/ + +bool mysqld_show_authors(THD *thd) +{ + List<Item> field_list; + Protocol *protocol= thd->protocol; + DBUG_ENTER("mysqld_show_authors"); + + field_list.push_back(new Item_empty_string("Name",40)); + field_list.push_back(new Item_empty_string("Location",40)); + field_list.push_back(new Item_empty_string("Comment",80)); + + if (protocol->send_fields(&field_list, + Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)) + DBUG_RETURN(TRUE); + + show_table_authors_st *authors; + for (authors= show_table_authors; authors->name; authors++) + { + protocol->prepare_for_resend(); + protocol->store(authors->name, system_charset_info); + protocol->store(authors->location, system_charset_info); + protocol->store(authors->comment, system_charset_info); + if (protocol->write()) + DBUG_RETURN(TRUE); } send_eof(thd); DBUG_RETURN(FALSE); } - /*************************************************************************** List all privileges supported ***************************************************************************/ @@ -111,6 +247,7 @@ static struct show_privileges_st sys_privileges[]= {"Create user", "Server Admin", "To create new users"}, {"Delete", "Tables", "To delete existing rows"}, {"Drop", "Databases,Tables", "To drop databases, tables, and views"}, + {"Event","Server Admin","To create, alter, drop and execute events"}, {"Execute", "Functions,Procedures", "To execute stored routines"}, {"File", "File access on server", "To read and write files on the server"}, {"Grant option", "Databases,Tables,Functions,Procedures", "To give to other users those privileges you possess"}, @@ -127,6 +264,7 @@ static struct show_privileges_st sys_privileges[]= {"Show view","Tables","To see views with SHOW CREATE VIEW"}, {"Shutdown","Server Admin", "To shut down the server"}, {"Super","Server Admin","To use KILL thread, SET GLOBAL, CHANGE MASTER, etc."}, + {"Trigger","Tables", "To use triggers"}, {"Update", "Tables", "To update existing rows"}, {"Usage","Server Admin","No privileges - allow connect only"}, {NullS, NullS, NullS} @@ -278,9 +416,14 @@ mysql_find_files(THD *thd,List<char> *files, const char *db,const char *path, for (i=0 ; i < (uint) dirp->number_off_files ; i++) { + char uname[NAME_LEN*3+1]; /* Unencoded name */ file=dirp->dir_entry+i; if (dir) { /* Return databases */ + if ((file->name[0] == '.' && + ((file->name[1] == '.' && file->name[2] == '\0') || + file->name[1] == '\0'))) + continue; /* . or .. */ #ifdef USE_SYMDIR char *ext; char buff[FN_REFLEN]; @@ -297,17 +440,22 @@ mysql_find_files(THD *thd,List<char> *files, const char *db,const char *path, continue; } #endif - if (file->name[0] == '.' || !MY_S_ISDIR(file->mystat->st_mode) || - (wild && wild_compare(file->name,wild,0))) - continue; + if (!MY_S_ISDIR(file->mystat->st_mode)) + continue; + VOID(filename_to_tablename(file->name, uname, sizeof(uname))); + if (wild && wild_compare(uname, wild, 0)) + continue; + file->name= uname; } else { // Return only .frm files which aren't temp files. - if (my_strcasecmp(system_charset_info, ext=fn_ext(file->name),reg_ext) || + if (my_strcasecmp(system_charset_info, ext=fn_rext(file->name),reg_ext) || is_prefix(file->name,tmp_file_prefix)) continue; *ext=0; + VOID(filename_to_tablename(file->name, uname, sizeof(uname))); + file->name= uname; if (wild) { if (lower_case_table_names) @@ -392,7 +540,7 @@ mysqld_show_create(THD *thd, TABLE_LIST *table_list) buffer.length(0); if ((table_list->view ? view_store_create_info(thd, table_list, &buffer) : - store_create_info(thd, table_list, &buffer))) + store_create_info(thd, table_list, &buffer, NULL))) DBUG_RETURN(TRUE); List<Item> field_list; @@ -449,12 +597,6 @@ bool mysqld_show_create_db(THD *thd, char *dbname, Protocol *protocol=thd->protocol; DBUG_ENTER("mysql_show_create_db"); - if (check_db_name(dbname)) - { - my_error(ER_WRONG_DB_NAME, MYF(0), dbname); - DBUG_RETURN(TRUE); - } - #ifndef NO_EMBEDDED_ACCESS_CHECKS if (test_all_bits(sctx->master_access, DB_ACLS)) db_access=DB_ACLS; @@ -465,8 +607,8 @@ bool mysqld_show_create_db(THD *thd, char *dbname, { my_error(ER_DBACCESS_DENIED_ERROR, MYF(0), sctx->priv_user, sctx->host_or_ip, dbname); - mysql_log.write(thd,COM_INIT_DB,ER(ER_DBACCESS_DENIED_ERROR), - sctx->priv_user, sctx->host_or_ip, dbname); + general_log_print(thd,COM_INIT_DB,ER(ER_DBACCESS_DENIED_ERROR), + sctx->priv_user, sctx->host_or_ip, dbname); DBUG_RETURN(TRUE); } #endif @@ -478,8 +620,7 @@ bool mysqld_show_create_db(THD *thd, char *dbname, } else { - (void) sprintf(path,"%s/%s",mysql_data_home, dbname); - length=unpack_dirname(path,path); // Convert if not unix + length= build_table_filename(path, sizeof(path), dbname, "", ""); found_libchar= 0; if (length && path[length-1] == FN_LIBCHAR) { @@ -532,29 +673,6 @@ bool mysqld_show_create_db(THD *thd, char *dbname, DBUG_RETURN(FALSE); } -bool -mysqld_show_logs(THD *thd) -{ - List<Item> field_list; - Protocol *protocol= thd->protocol; - DBUG_ENTER("mysqld_show_logs"); - - field_list.push_back(new Item_empty_string("File",FN_REFLEN)); - field_list.push_back(new Item_empty_string("Type",10)); - field_list.push_back(new Item_empty_string("Status",10)); - - if (protocol->send_fields(&field_list, - Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)) - DBUG_RETURN(TRUE); - -#ifdef HAVE_BERKELEY_DB - if ((have_berkeley_db == SHOW_OPTION_YES) && berkeley_show_logs(protocol)) - DBUG_RETURN(TRUE); -#endif - - send_eof(thd); - DBUG_RETURN(FALSE); -} /**************************************************************************** @@ -597,10 +715,10 @@ mysqld_dump_create_info(THD *thd, TABLE_LIST *table_list, int fd) Protocol *protocol= thd->protocol; String *packet= protocol->storage_packet(); DBUG_ENTER("mysqld_dump_create_info"); - DBUG_PRINT("enter",("table: %s",table_list->table->s->table_name)); + DBUG_PRINT("enter",("table: %s",table_list->table->s->table_name.str)); protocol->prepare_for_resend(); - if (store_create_info(thd, table_list, packet)) + if (store_create_info(thd, table_list, packet, NULL)) DBUG_RETURN(-1); if (fd < 0) @@ -753,11 +871,34 @@ static void append_directory(THD *thd, String *packet, const char *dir_type, #define LIST_PROCESS_HOST_LEN 64 -static int -store_create_info(THD *thd, TABLE_LIST *table_list, String *packet) +/* + Build a CREATE TABLE statement for a table. + + SYNOPSIS + store_create_info() + thd The thread + table_list A list containing one table to write statement + for. + packet Pointer to a string where statement will be + written. + create_info_arg Pointer to create information that can be used + to tailor the format of the statement. Can be + NULL, in which case only SQL_MODE is considered + when building the statement. + + NOTE + Currently always return 0, but might return error code in the + future. + + RETURN + 0 OK + */ +int +store_create_info(THD *thd, TABLE_LIST *table_list, String *packet, + HA_CREATE_INFO *create_info_arg) { List<Item> field_list; - char tmp[MAX_FIELD_WIDTH], *for_str, buff[128], *end; + char tmp[MAX_FIELD_WIDTH], *for_str, buff[128], *end, uname[NAME_LEN*3+1]; const char *alias; String type(tmp, sizeof(tmp), system_charset_info); Field **ptr,*field; @@ -777,7 +918,7 @@ store_create_info(THD *thd, TABLE_LIST *table_list, String *packet) (MODE_NO_FIELD_OPTIONS | MODE_MYSQL323 | MODE_MYSQL40)) != 0; DBUG_ENTER("store_create_info"); - DBUG_PRINT("enter",("table: %s", table->s->table_name)); + DBUG_PRINT("enter",("table: %s", table->s->table_name.str)); restore_record(table, s->default_values); // Get empty record @@ -788,8 +929,14 @@ store_create_info(THD *thd, TABLE_LIST *table_list, String *packet) if (table_list->schema_table) alias= table_list->schema_table->table_name; else - alias= (lower_case_table_names == 2 ? table->alias : - share->table_name); + { + if (lower_case_table_names == 2) + alias= table->alias; + else + { + alias= share->table_name.str; + } + } append_identifier(thd, packet, alias, strlen(alias)); packet->append(STRING_WITH_LEN(" (\n")); @@ -914,15 +1061,20 @@ store_create_info(THD *thd, TABLE_LIST *table_list, String *packet) if (i == primary_key && !strcmp(key_info->name, primary_key_name)) { found_primary=1; - packet->append(STRING_WITH_LEN("PRIMARY ")); + /* + No space at end, because a space will be added after where the + identifier would go, but that is not added for primary key. + */ + packet->append(STRING_WITH_LEN("PRIMARY KEY")); } else if (key_info->flags & HA_NOSAME) - packet->append(STRING_WITH_LEN("UNIQUE ")); + packet->append(STRING_WITH_LEN("UNIQUE KEY ")); else if (key_info->flags & HA_FULLTEXT) - packet->append(STRING_WITH_LEN("FULLTEXT ")); + packet->append(STRING_WITH_LEN("FULLTEXT KEY ")); else if (key_info->flags & HA_SPATIAL) - packet->append(STRING_WITH_LEN("SPATIAL ")); - packet->append(STRING_WITH_LEN("KEY ")); + packet->append(STRING_WITH_LEN("SPATIAL KEY ")); + else + packet->append(STRING_WITH_LEN("KEY ")); if (!found_primary) append_identifier(thd, packet, key_info->name, strlen(key_info->name)); @@ -967,6 +1119,12 @@ store_create_info(THD *thd, TABLE_LIST *table_list, String *packet) } } packet->append(')'); + if (key_info->parser) + { + packet->append(" WITH PARSER ", 13); + append_identifier(thd, packet, key_info->parser->name.str, + key_info->parser->name.length); + } } /* @@ -983,22 +1141,59 @@ store_create_info(THD *thd, TABLE_LIST *table_list, String *packet) packet->append(STRING_WITH_LEN("\n)")); if (!(thd->variables.sql_mode & MODE_NO_TABLE_OPTIONS) && !foreign_db_mode) { - if (thd->variables.sql_mode & (MODE_MYSQL323 | MODE_MYSQL40)) - packet->append(STRING_WITH_LEN(" TYPE=")); + /* + Get possible table space definitions and append them + to the CREATE TABLE statement + */ + + if ((for_str= file->get_tablespace_name(thd))) + { + packet->append(" TABLESPACE "); + packet->append(for_str, strlen(for_str)); + packet->append(" STORAGE DISK"); + my_free(for_str, MYF(0)); + } + + /* + IF check_create_info + THEN add ENGINE only if it was used when creating the table + */ + if (!create_info_arg || + (create_info_arg->used_fields & HA_CREATE_USED_ENGINE)) + { + if (thd->variables.sql_mode & (MODE_MYSQL323 | MODE_MYSQL40)) + packet->append(STRING_WITH_LEN(" TYPE=")); + else + packet->append(STRING_WITH_LEN(" ENGINE=")); +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (table->part_info) + packet->append(ha_resolve_storage_engine_name( + table->part_info->default_engine_type)); else - packet->append(STRING_WITH_LEN(" ENGINE=")); - packet->append(file->table_type()); + packet->append(file->table_type()); +#else + packet->append(file->table_type()); +#endif + } if (share->table_charset && !(thd->variables.sql_mode & MODE_MYSQL323) && !(thd->variables.sql_mode & MODE_MYSQL40)) { - packet->append(STRING_WITH_LEN(" DEFAULT CHARSET=")); - packet->append(share->table_charset->csname); - if (!(share->table_charset->state & MY_CS_PRIMARY)) + /* + IF check_create_info + THEN add DEFAULT CHARSET only if it was used when creating the table + */ + if (!create_info_arg || + (create_info_arg->used_fields & HA_CREATE_USED_DEFAULT_CHARSET)) { - packet->append(STRING_WITH_LEN(" COLLATE=")); - packet->append(table->s->table_charset->name); + packet->append(STRING_WITH_LEN(" DEFAULT CHARSET=")); + packet->append(share->table_charset->csname); + if (!(share->table_charset->state & MY_CS_PRIMARY)) + { + packet->append(STRING_WITH_LEN(" COLLATE=")); + packet->append(table->s->table_charset->name); + } } } @@ -1047,18 +1242,26 @@ store_create_info(THD *thd, TABLE_LIST *table_list, String *packet) packet->append(STRING_WITH_LEN(" CONNECTION=")); append_unescaped(packet, share->connect_string.str, share->connect_string.length); } - if (file->raid_type) - { - uint length; - length= my_snprintf(buff,sizeof(buff), - " RAID_TYPE=%s RAID_CHUNKS=%d RAID_CHUNKSIZE=%ld", - my_raid_type(file->raid_type), file->raid_chunks, - file->raid_chunksize/RAID_BLOCK_SIZE); - packet->append(buff, length); - } append_directory(thd, packet, "DATA", create_info.data_file_name); append_directory(thd, packet, "INDEX", create_info.index_file_name); } +#ifdef WITH_PARTITION_STORAGE_ENGINE + { + /* + Partition syntax for CREATE TABLE is at the end of the syntax. + */ + uint part_syntax_len; + char *part_syntax; + if (table->part_info && + ((part_syntax= generate_partition_syntax(table->part_info, + &part_syntax_len, + FALSE,FALSE)))) + { + packet->append(part_syntax, part_syntax_len); + my_free(part_syntax, MYF(0)); + } + } +#endif DBUG_RETURN(0); } @@ -1086,7 +1289,6 @@ view_store_options(THD *thd, TABLE_LIST *table, String *buff) buff->append(STRING_WITH_LEN("SQL SECURITY INVOKER ")); } - /* Append DEFINER clause to the given buffer. @@ -1109,7 +1311,7 @@ void append_definer(THD *thd, String *buffer, const LEX_STRING *definer_user, } -static int +int view_store_create_info(THD *thd, TABLE_LIST *table, String *buff) { my_bool foreign_db_mode= (thd->variables.sql_mode & (MODE_POSTGRESQL | @@ -1311,7 +1513,7 @@ void mysqld_list_processes(THD *thd,const char *user, bool verbose) if (thd_info->proc_info) protocol->store(thd_info->proc_info, system_charset_info); else - protocol->store(command_name[thd_info->command], system_charset_info); + protocol->store(command_name[thd_info->command].str, system_charset_info); if (thd_info->start_time) protocol->store((uint32) (now - thd_info->start_time)); else @@ -1325,46 +1527,324 @@ void mysqld_list_processes(THD *thd,const char *user, bool verbose) DBUG_VOID_RETURN; } +int fill_schema_processlist(THD* thd, TABLE_LIST* tables, COND* cond) +{ + TABLE *table= tables->table; + CHARSET_INFO *cs= system_charset_info; + char *user; + bool verbose; + ulong max_query_length; + time_t now= time(0); + DBUG_ENTER("fill_process_list"); + + user= thd->security_ctx->master_access & PROCESS_ACL ? + NullS : thd->security_ctx->priv_user; + verbose= thd->lex->verbose; + max_query_length= PROCESS_LIST_WIDTH; + + VOID(pthread_mutex_lock(&LOCK_thread_count)); + + if (!thd->killed) + { + I_List_iterator<THD> it(threads); + THD* tmp; + + while ((tmp= it++)) + { + Security_context *tmp_sctx= tmp->security_ctx; + struct st_my_thread_var *mysys_var; + const char *val; + + if ((!tmp->vio_ok() && !tmp->system_thread) || + (user && (!tmp_sctx->user || strcmp(tmp_sctx->user, user)))) + continue; + + restore_record(table, s->default_values); + /* ID */ + table->field[0]->store((longlong) tmp->thread_id, TRUE); + /* USER */ + val= tmp_sctx->user ? tmp_sctx->user : + (tmp->system_thread ? "system user" : "unauthenticated user"); + table->field[1]->store(val, strlen(val), cs); + /* HOST */ + if (tmp->peer_port && (tmp_sctx->host || tmp_sctx->ip) && + thd->security_ctx->host_or_ip[0]) + { + char host[LIST_PROCESS_HOST_LEN + 1]; + my_snprintf(host, LIST_PROCESS_HOST_LEN, "%s:%u", + tmp_sctx->host_or_ip, tmp->peer_port); + table->field[2]->store(host, strlen(host), cs); + } + else + table->field[2]->store(tmp_sctx->host_or_ip, + strlen(tmp_sctx->host_or_ip), cs); + /* DB */ + if (tmp->db) + { + table->field[3]->store(tmp->db, strlen(tmp->db), cs); + table->field[3]->set_notnull(); + } + + if ((mysys_var= tmp->mysys_var)) + pthread_mutex_lock(&mysys_var->mutex); + /* COMMAND */ + if ((val= (char *) (tmp->killed == THD::KILL_CONNECTION? "Killed" : 0))) + table->field[4]->store(val, strlen(val), cs); + else + table->field[4]->store(command_name[tmp->command].str, + command_name[tmp->command].length, cs); + /* TIME */ + table->field[5]->store((uint32)(tmp->start_time ? + now - tmp->start_time : 0), TRUE); + /* STATE */ +#ifndef EMBEDDED_LIBRARY + val= (char*) (tmp->locked ? "Locked" : + tmp->net.reading_or_writing ? + (tmp->net.reading_or_writing == 2 ? + "Writing to net" : + tmp->command == COM_SLEEP ? "" : + "Reading from net") : + tmp->proc_info ? tmp->proc_info : + tmp->mysys_var && + tmp->mysys_var->current_cond ? + "Waiting on cond" : NullS); +#else + val= (char *) "Writing to net"; +#endif + if (val) + { + table->field[6]->store(val, strlen(val), cs); + table->field[6]->set_notnull(); + } + + if (mysys_var) + pthread_mutex_unlock(&mysys_var->mutex); + + /* INFO */ + if (tmp->query) + { + table->field[7]->store(tmp->query, + min(max_query_length, tmp->query_length), cs); + table->field[7]->set_notnull(); + } + + if (schema_table_store_record(thd, table)) + { + VOID(pthread_mutex_unlock(&LOCK_thread_count)); + DBUG_RETURN(1); + } + } + } + + VOID(pthread_mutex_unlock(&LOCK_thread_count)); + DBUG_RETURN(0); +} + /***************************************************************************** Status functions *****************************************************************************/ +static DYNAMIC_ARRAY all_status_vars; +static bool status_vars_inited= 0; +static int show_var_cmp(const void *var1, const void *var2) +{ + return strcmp(((SHOW_VAR*)var1)->name, ((SHOW_VAR*)var2)->name); +} + +/* + deletes all the SHOW_UNDEF elements from the array and calls + delete_dynamic() if it's completely empty. +*/ +static void shrink_var_array(DYNAMIC_ARRAY *array) +{ + uint a,b; + SHOW_VAR *all= dynamic_element(array, 0, SHOW_VAR *); + + for (a= b= 0; b < array->elements; b++) + if (all[b].type != SHOW_UNDEF) + all[a++]= all[b]; + if (a) + { + bzero(all+a, sizeof(SHOW_VAR)); // writing NULL-element to the end + array->elements= a; + } + else // array is completely empty - delete it + delete_dynamic(array); +} + +/* + Adds an array of SHOW_VAR entries to the output of SHOW STATUS + + SYNOPSIS + add_status_vars(SHOW_VAR *list) + list - an array of SHOW_VAR entries to add to all_status_vars + the last entry must be {0,0,SHOW_UNDEF} + + NOTE + The handling of all_status_vars[] is completely internal, it's allocated + automatically when something is added to it, and deleted completely when + the last entry is removed. + + As a special optimization, if add_status_vars() is called before + init_status_vars(), it assumes "startup mode" - neither concurrent access + to the array nor SHOW STATUS are possible (thus it skips locks and qsort) + + The last entry of the all_status_vars[] should always be {0,0,SHOW_UNDEF} +*/ +int add_status_vars(SHOW_VAR *list) +{ + int res= 0; + if (status_vars_inited) + pthread_mutex_lock(&LOCK_status); + if (!all_status_vars.buffer && // array is not allocated yet - do it now + my_init_dynamic_array(&all_status_vars, sizeof(SHOW_VAR), 200, 20)) + { + res= 1; + goto err; + } + while (list->name) + res|= insert_dynamic(&all_status_vars, (gptr)list++); + res|= insert_dynamic(&all_status_vars, (gptr)list); // appending NULL-element + all_status_vars.elements--; // but next insert_dynamic should overwite it + if (status_vars_inited) + sort_dynamic(&all_status_vars, show_var_cmp); +err: + if (status_vars_inited) + pthread_mutex_unlock(&LOCK_status); + return res; +} + +/* + Make all_status_vars[] usable for SHOW STATUS + + NOTE + See add_status_vars(). Before init_status_vars() call, add_status_vars() + works in a special fast "startup" mode. Thus init_status_vars() + should be called as late as possible but before enabling multi-threading. +*/ +void init_status_vars() +{ + status_vars_inited=1; + sort_dynamic(&all_status_vars, show_var_cmp); +} + +/* + catch-all cleanup function, cleans up everything no matter what + + DESCRIPTION + This function is not strictly required if all add_to_status/ + remove_status_vars are properly paired, but it's a safety measure that + deletes everything from the all_status_vars[] even if some + remove_status_vars were forgotten +*/ +void free_status_vars() +{ + delete_dynamic(&all_status_vars); +} + +/* + Removes an array of SHOW_VAR entries from the output of SHOW STATUS + + SYNOPSIS + remove_status_vars(SHOW_VAR *list) + list - an array of SHOW_VAR entries to remove to all_status_vars + the last entry must be {0,0,SHOW_UNDEF} + + NOTE + there's lots of room for optimizing this, especially in non-sorted mode, + but nobody cares - it may be called only in case of failed plugin + initialization in the mysqld startup. + +*/ +void remove_status_vars(SHOW_VAR *list) +{ + if (status_vars_inited) + { + pthread_mutex_lock(&LOCK_status); + SHOW_VAR *all= dynamic_element(&all_status_vars, 0, SHOW_VAR *); + int a= 0, b= all_status_vars.elements, c= (a+b)/2, res; + + for (; list->name; list++) + { + for (a= 0, b= all_status_vars.elements; b-a > 1; c= (a+b)/2) + { + res= show_var_cmp(list, all+c); + if (res < 0) + b= c; + else if (res > 0) + a= c; + else break; + } + if (res == 0) + all[c].type= SHOW_UNDEF; + } + shrink_var_array(&all_status_vars); + pthread_mutex_unlock(&LOCK_status); + } + else + { + SHOW_VAR *all= dynamic_element(&all_status_vars, 0, SHOW_VAR *); + int i; + for (; list->name; list++) + { + for (i= 0; i < all_status_vars.elements; i++) + { + if (show_var_cmp(list, all+i)) + continue; + all[i].type= SHOW_UNDEF; + break; + } + } + shrink_var_array(&all_status_vars); + } +} static bool show_status_array(THD *thd, const char *wild, - show_var_st *variables, + SHOW_VAR *variables, enum enum_var_type value_type, struct system_status_var *status_var, const char *prefix, TABLE *table) { - char buff[1024], *prefix_end; + char buff[SHOW_VAR_FUNC_BUFF_SIZE], *prefix_end; /* the variable name should not be longer then 80 characters */ char name_buffer[80]; int len; LEX_STRING null_lex_str; + SHOW_VAR tmp, *var; DBUG_ENTER("show_status_array"); null_lex_str.str= 0; // For sys_var->value_ptr() null_lex_str.length= 0; prefix_end=strnmov(name_buffer, prefix, sizeof(name_buffer)-1); + if (*prefix) + *prefix_end++= '_'; len=name_buffer + sizeof(name_buffer) - prefix_end; for (; variables->name; variables++) { strnmov(prefix_end, variables->name, len); name_buffer[sizeof(name_buffer)-1]=0; /* Safety */ - SHOW_TYPE show_type=variables->type; - if (show_type == SHOW_VARS) + + /* + if var->type is SHOW_FUNC, call the function. + Repeat as necessary, if new var is again SHOW_FUNC + */ + for (var=variables; var->type == SHOW_FUNC; var= &tmp) + ((mysql_show_var_func)(var->value))(thd, &tmp, buff); + + SHOW_TYPE show_type=var->type; + if (show_type == SHOW_ARRAY) { - show_status_array(thd, wild, (show_var_st *) variables->value, - value_type, status_var, variables->name, table); + show_status_array(thd, wild, (SHOW_VAR *) var->value, + value_type, status_var, name_buffer, table); } else { if (!(wild && wild[0] && wild_case_compare(system_charset_info, name_buffer, wild))) { - char *value=variables->value; + char *value=var->value; const char *pos, *end; // We assign a lot of const's long nr; if (show_type == SHOW_SYS) @@ -1375,13 +1855,22 @@ static bool show_status_array(THD *thd, const char *wild, } pos= end= buff; + /* + note that value may be == buff. All SHOW_xxx code below + should still work in this case + */ switch (show_type) { + case SHOW_DOUBLE_STATUS: + { + value= ((char *) status_var + (ulong) value); + end= buff + sprintf(buff, "%f", *(double*) value); + break; + } case SHOW_LONG_STATUS: - case SHOW_LONG_CONST_STATUS: value= ((char *) status_var + (ulong) value); /* fall through */ case SHOW_LONG: - case SHOW_LONG_CONST: + case SHOW_LONG_NOFLUSH: // the difference lies in refresh_status() end= int10_to_str(*(long*) value, buff, 10); break; case SHOW_LONGLONG: @@ -1396,7 +1885,6 @@ static bool show_status_array(THD *thd, const char *wild, case SHOW_MY_BOOL: end= strmov(buff, *(my_bool*) value ? "ON" : "OFF"); break; - case SHOW_INT_CONST: case SHOW_INT: end= int10_to_str((long) *(uint32*) value, buff, 10); break; @@ -1414,77 +1902,6 @@ static bool show_status_array(THD *thd, const char *wild, end= strend(pos); break; } - case SHOW_STARTTIME: - nr= (long) (thd->query_start() - start_time); - end= int10_to_str(nr, buff, 10); - break; - case SHOW_QUESTION: - end= int10_to_str((long) thd->query_id, buff, 10); - break; -#ifdef HAVE_REPLICATION - case SHOW_RPL_STATUS: - end= strmov(buff, rpl_status_type[(int)rpl_status]); - break; - case SHOW_SLAVE_RUNNING: - { - pthread_mutex_lock(&LOCK_active_mi); - end= strmov(buff, (active_mi && active_mi->slave_running && - active_mi->rli.slave_running) ? "ON" : "OFF"); - pthread_mutex_unlock(&LOCK_active_mi); - break; - } - case SHOW_SLAVE_RETRIED_TRANS: - { - /* - TODO: in 5.1 with multimaster, have one such counter per line in - SHOW SLAVE STATUS, and have the sum over all lines here. - */ - pthread_mutex_lock(&LOCK_active_mi); - if (active_mi) - { - pthread_mutex_lock(&active_mi->rli.data_lock); - end= int10_to_str(active_mi->rli.retried_trans, buff, 10); - pthread_mutex_unlock(&active_mi->rli.data_lock); - } - pthread_mutex_unlock(&LOCK_active_mi); - break; - } - case SHOW_SLAVE_SKIP_ERRORS: - { - MY_BITMAP *bitmap= (MY_BITMAP *)value; - if (!use_slave_mask || bitmap_is_clear_all(bitmap)) - { - end= strmov(buff, "OFF"); - } - else if (bitmap_is_set_all(bitmap)) - { - end= strmov(buff, "ALL"); - } - else - { - /* 10 is enough assuming errors are max 4 digits */ - int i; - for (i= 1; - i < MAX_SLAVE_ERROR && (uint) (end-buff) < sizeof(buff)-10; - i++) - { - if (bitmap_is_set(bitmap, i)) - { - end= int10_to_str(i, (char*) end, 10); - *(char*) end++= ','; - } - } - if (end != buff) - end--; // Remove last ',' - if (i < MAX_SLAVE_ERROR) - end= strmov((char*) end, "..."); // Couldn't show all errors - } - break; - } -#endif /* HAVE_REPLICATION */ - case SHOW_OPENTABLES: - end= int10_to_str((long) cached_tables(), buff, 10); - break; case SHOW_CHAR_PTR: { if (!(pos= *(char**) value)) @@ -1492,203 +1909,19 @@ static bool show_status_array(THD *thd, const char *wild, end= strend(pos); break; } - case SHOW_DOUBLE_STATUS: - { - value= ((char *) status_var + (ulong) value); - end= buff + sprintf(buff, "%f", *(double*) value); - break; - } -#ifdef HAVE_OPENSSL - /* First group - functions relying on CTX */ - case SHOW_SSL_CTX_SESS_ACCEPT: - end= int10_to_str((long) (!ssl_acceptor_fd ? 0 : - SSL_CTX_sess_accept(ssl_acceptor_fd-> - ssl_context)), - buff, 10); - break; - case SHOW_SSL_CTX_SESS_ACCEPT_GOOD: - end= int10_to_str((long) (!ssl_acceptor_fd ? 0 : - SSL_CTX_sess_accept_good(ssl_acceptor_fd-> - ssl_context)), - buff, 10); - break; - case SHOW_SSL_CTX_SESS_CONNECT_GOOD: - end= int10_to_str((long) (!ssl_acceptor_fd ? 0 : - SSL_CTX_sess_connect_good(ssl_acceptor_fd-> - ssl_context)), - buff, 10); - break; - case SHOW_SSL_CTX_SESS_ACCEPT_RENEGOTIATE: - end= int10_to_str((long) (!ssl_acceptor_fd ? 0 : - SSL_CTX_sess_accept_renegotiate(ssl_acceptor_fd->ssl_context)), - buff, 10); - break; - case SHOW_SSL_CTX_SESS_CONNECT_RENEGOTIATE: - end= int10_to_str((long) (!ssl_acceptor_fd ? 0 : - SSL_CTX_sess_connect_renegotiate(ssl_acceptor_fd-> ssl_context)), - buff, 10); - break; - case SHOW_SSL_CTX_SESS_CB_HITS: - end= int10_to_str((long) (!ssl_acceptor_fd ? 0 : - SSL_CTX_sess_cb_hits(ssl_acceptor_fd-> - ssl_context)), - buff, 10); - break; - case SHOW_SSL_CTX_SESS_HITS: - end= int10_to_str((long) (!ssl_acceptor_fd ? 0 : - SSL_CTX_sess_hits(ssl_acceptor_fd-> - ssl_context)), - buff, 10); - break; - case SHOW_SSL_CTX_SESS_CACHE_FULL: - end= int10_to_str((long) (!ssl_acceptor_fd ? 0 : - SSL_CTX_sess_cache_full(ssl_acceptor_fd-> - ssl_context)), - buff, 10); - break; - case SHOW_SSL_CTX_SESS_MISSES: - end= int10_to_str((long) (!ssl_acceptor_fd ? 0 : - SSL_CTX_sess_misses(ssl_acceptor_fd-> - ssl_context)), - buff, 10); - break; - case SHOW_SSL_CTX_SESS_TIMEOUTS: - end= int10_to_str((long) (!ssl_acceptor_fd ? 0 : - SSL_CTX_sess_timeouts(ssl_acceptor_fd->ssl_context)), - buff,10); - break; - case SHOW_SSL_CTX_SESS_NUMBER: - end= int10_to_str((long) (!ssl_acceptor_fd ? 0 : - SSL_CTX_sess_number(ssl_acceptor_fd->ssl_context)), - buff,10); - break; - case SHOW_SSL_CTX_SESS_CONNECT: - end= int10_to_str((long) (!ssl_acceptor_fd ? 0 : - SSL_CTX_sess_connect(ssl_acceptor_fd->ssl_context)), - buff,10); - break; - case SHOW_SSL_CTX_SESS_GET_CACHE_SIZE: - end= int10_to_str((long) (!ssl_acceptor_fd ? 0 : - SSL_CTX_sess_get_cache_size(ssl_acceptor_fd->ssl_context)), - buff,10); - break; - case SHOW_SSL_CTX_GET_VERIFY_MODE: - end= int10_to_str((long) (!ssl_acceptor_fd ? 0 : - SSL_CTX_get_verify_mode(ssl_acceptor_fd->ssl_context)), - buff,10); - break; - case SHOW_SSL_CTX_GET_VERIFY_DEPTH: - end= int10_to_str((long) (!ssl_acceptor_fd ? 0 : - SSL_CTX_get_verify_depth(ssl_acceptor_fd->ssl_context)), - buff,10); - break; - case SHOW_SSL_CTX_GET_SESSION_CACHE_MODE: - if (!ssl_acceptor_fd) - { - pos= "NONE"; - end= pos+4; - break; - } - switch (SSL_CTX_get_session_cache_mode(ssl_acceptor_fd->ssl_context)) - { - case SSL_SESS_CACHE_OFF: - pos= "OFF"; - break; - case SSL_SESS_CACHE_CLIENT: - pos= "CLIENT"; - break; - case SSL_SESS_CACHE_SERVER: - pos= "SERVER"; - break; - case SSL_SESS_CACHE_BOTH: - pos= "BOTH"; - break; - case SSL_SESS_CACHE_NO_AUTO_CLEAR: - pos= "NO_AUTO_CLEAR"; - break; - case SSL_SESS_CACHE_NO_INTERNAL_LOOKUP: - pos= "NO_INTERNAL_LOOKUP"; - break; - default: - pos= "Unknown"; - break; - } - end= strend(pos); - break; - /* First group - functions relying on SSL */ - case SHOW_SSL_GET_VERSION: - pos= (thd->net.vio->ssl_arg ? - SSL_get_version((SSL*) thd->net.vio->ssl_arg) : ""); - end= strend(pos); - break; - case SHOW_SSL_SESSION_REUSED: - end= int10_to_str((long) (thd->net.vio->ssl_arg ? - SSL_session_reused((SSL*) thd->net.vio-> - ssl_arg) : - 0), - buff, 10); - break; - case SHOW_SSL_GET_DEFAULT_TIMEOUT: - end= int10_to_str((long) (thd->net.vio->ssl_arg ? - SSL_get_default_timeout((SSL*) thd->net.vio-> - ssl_arg) : - 0), - buff, 10); - break; - case SHOW_SSL_GET_VERIFY_MODE: - end= int10_to_str((long) (thd->net.vio->ssl_arg ? - SSL_get_verify_mode((SSL*) thd->net.vio-> - ssl_arg): - 0), - buff, 10); - break; - case SHOW_SSL_GET_VERIFY_DEPTH: - end= int10_to_str((long) (thd->net.vio->ssl_arg ? - SSL_get_verify_depth((SSL*) thd->net.vio-> - ssl_arg): - 0), - buff, 10); - break; - case SHOW_SSL_GET_CIPHER: - pos= (thd->net.vio->ssl_arg ? - SSL_get_cipher((SSL*) thd->net.vio->ssl_arg) : "" ); - end= strend(pos); - break; - case SHOW_SSL_GET_CIPHER_LIST: - if (thd->net.vio->ssl_arg) - { - char *to= buff; - for (int i=0 ; i++ ;) - { - const char *p= SSL_get_cipher_list((SSL*) thd->net.vio->ssl_arg,i); - if (p == NULL) - break; - to= strmov(to, p); - *to++= ':'; - } - if (to != buff) - to--; // Remove last ':' - end= to; - } - break; - -#endif /* HAVE_OPENSSL */ case SHOW_KEY_CACHE_LONG: - case SHOW_KEY_CACHE_CONST_LONG: - value= (value-(char*) &dflt_key_cache_var)+ (char*) dflt_key_cache; + value= (char*) dflt_key_cache + (ulong)value; end= int10_to_str(*(long*) value, buff, 10); break; case SHOW_KEY_CACHE_LONGLONG: - value= (value-(char*) &dflt_key_cache_var)+ (char*) dflt_key_cache; + value= (char*) dflt_key_cache + (ulong)value; end= longlong10_to_str(*(longlong*) value, buff, 10); break; - case SHOW_NET_COMPRESSION: - end= strmov(buff, thd->net.compress ? "ON" : "OFF"); - break; - case SHOW_UNDEF: // Show never happen - case SHOW_SYS: - break; // Return empty string + case SHOW_UNDEF: + break; // Return empty string + case SHOW_SYS: // Cannot happen default: + DBUG_ASSERT(0); break; } restore_record(table, s->default_values); @@ -1769,10 +2002,10 @@ typedef struct st_index_field_values 1 error */ -static bool schema_table_store_record(THD *thd, TABLE *table) +bool schema_table_store_record(THD *thd, TABLE *table) { int error; - if ((error= table->file->write_row(table->record[0]))) + if ((error= table->file->ha_write_row(table->record[0]))) { if (create_myisam_from_heap(thd, table, table->pos_in_table_list->schema_table_param, @@ -1793,6 +2026,7 @@ void get_index_field_values(LEX *lex, INDEX_FIELD_VALUES *index_field_values) case SQLCOM_SHOW_TABLES: case SQLCOM_SHOW_TABLE_STATUS: case SQLCOM_SHOW_TRIGGERS: + case SQLCOM_SHOW_EVENTS: index_field_values->db_value= lex->select_lex.db; index_field_values->table_value= wild; break; @@ -2047,7 +2281,7 @@ int get_all_tables(THD *thd, TABLE_LIST *tables, COND *cond) Security_context *sctx= thd->security_ctx; uint derived_tables= lex->derived_tables; int error= 1; - db_type not_used; + enum legacy_db_type not_used; Open_tables_state open_tables_state_backup; bool save_view_prepare_mode= lex->view_prepare_mode; lex->view_prepare_mode= TRUE; @@ -2140,8 +2374,8 @@ int get_all_tables(THD *thd, TABLE_LIST *tables, COND *cond) } else { - strxmov(path, mysql_data_home, "/", base_name, NullS); - end= path + (len= unpack_dirname(path,path)); + len= build_table_filename(path, sizeof(path), base_name, "", ""); + end= path + len; len= FN_LEN - len; if (mysql_find_files(thd, &files, base_name, path, idx_field_vals.table_value, 0)) @@ -2289,8 +2523,7 @@ int fill_schema_shemata(THD *thd, TABLE_LIST *tables, COND *cond) (grant_option && !check_grant_db(thd, file_name))) #endif { - strxmov(path, mysql_data_home, "/", file_name, NullS); - length=unpack_dirname(path,path); // Convert if not unix + length= build_table_filename(path, sizeof(path), file_name, "", ""); found_libchar= 0; if (length && path[length-1] == FN_LIBCHAR) { @@ -2470,15 +2703,6 @@ static int get_schema_tables_record(THD *thd, struct st_table_list *tables, ptr=strxmov(ptr, " row_format=", ha_row_type[(uint) share->row_type], NullS); - if (file->raid_type) - { - char buff[100]; - my_snprintf(buff,sizeof(buff), - " raid_type=%s raid_chunks=%d raid_chunksize=%ld", - my_raid_type(file->raid_type), file->raid_chunks, - file->raid_chunksize/RAID_BLOCK_SIZE); - ptr=strmov(ptr,buff); - } table->field[19]->store(option_buff+1, (ptr == option_buff ? 0 : (uint) (ptr-option_buff)-1), cs); @@ -2728,6 +2952,7 @@ int fill_schema_charsets(THD *thd, TABLE_LIST *tables, COND *cond) CHARSET_INFO *tmp_cs= cs[0]; if (tmp_cs && (tmp_cs->state & MY_CS_PRIMARY) && (tmp_cs->state & MY_CS_AVAILABLE) && + !(tmp_cs->state & MY_CS_HIDDEN) && !(wild && wild[0] && wild_case_compare(scs, tmp_cs->csname,wild))) { @@ -2746,6 +2971,46 @@ int fill_schema_charsets(THD *thd, TABLE_LIST *tables, COND *cond) } +int fill_schema_engines(THD *thd, TABLE_LIST *tables, COND *cond) +{ + const char *wild= thd->lex->wild ? thd->lex->wild->ptr() : NullS; + TABLE *table= tables->table; + CHARSET_INFO *scs= system_charset_info; + handlerton **types; + + DBUG_ENTER("fill_schema_engines"); + + for (types= sys_table_types; *types; types++) + { + if ((*types)->flags & HTON_HIDDEN) + continue; + + if (!(wild && wild[0] && + wild_case_compare(scs, (*types)->name,wild))) + { + const char *tmp; + restore_record(table, s->default_values); + + table->field[0]->store((*types)->name, strlen((*types)->name), scs); + tmp= (*types)->state ? "DISABLED" : "ENABLED"; + table->field[1]->store( tmp, strlen(tmp), scs); + table->field[2]->store((*types)->comment, strlen((*types)->comment), scs); + tmp= (*types)->commit ? "YES" : "NO"; + table->field[3]->store( tmp, strlen(tmp), scs); + tmp= (*types)->prepare ? "YES" : "NO"; + table->field[4]->store( tmp, strlen(tmp), scs); + tmp= (*types)->savepoint_set ? "YES" : "NO"; + table->field[5]->store( tmp, strlen(tmp), scs); + + if (schema_table_store_record(thd, table)) + DBUG_RETURN(1); + } + } + + DBUG_RETURN(0); +} + + int fill_schema_collation(THD *thd, TABLE_LIST *tables, COND *cond) { CHARSET_INFO **cs; @@ -2757,6 +3022,7 @@ int fill_schema_collation(THD *thd, TABLE_LIST *tables, COND *cond) CHARSET_INFO **cl; CHARSET_INFO *tmp_cs= cs[0]; if (!tmp_cs || !(tmp_cs->state & MY_CS_AVAILABLE) || + (tmp_cs->state & MY_CS_HIDDEN) || !(tmp_cs->state & MY_CS_PRIMARY)) continue; for (cl= all_charsets; cl < all_charsets+255 ;cl ++) @@ -2912,7 +3178,7 @@ int fill_schema_proc(THD *thd, TABLE_LIST *tables, COND *cond) { DBUG_RETURN(1); } - proc_table->file->ha_index_init(0); + proc_table->file->ha_index_init(0, 1); if ((res= proc_table->file->index_first(proc_table->record[0]))) { res= (res == HA_ERR_END_OF_FILE) ? 0 : 1; @@ -3347,6 +3613,532 @@ static int get_schema_key_column_usage_record(THD *thd, } +static void collect_partition_expr(List<char> &field_list, String *str) +{ + List_iterator<char> part_it(field_list); + ulong no_fields= field_list.elements; + const char *field_str; + str->length(0); + while ((field_str= part_it++)) + { + str->append(field_str); + if (--no_fields != 0) + str->append(","); + } + return; +} + + +static void store_schema_partitions_record(THD *thd, TABLE *table, + partition_element *part_elem, + handler *file, uint part_id) +{ + CHARSET_INFO *cs= system_charset_info; + PARTITION_INFO stat_info; + TIME time; + file->get_dynamic_partition_info(&stat_info, part_id); + table->field[12]->store((longlong) stat_info.records, TRUE); + table->field[13]->store((longlong) stat_info.mean_rec_length, TRUE); + table->field[14]->store((longlong) stat_info.data_file_length, TRUE); + if (stat_info.max_data_file_length) + { + table->field[15]->store((longlong) stat_info.max_data_file_length, TRUE); + table->field[15]->set_notnull(); + } + table->field[16]->store((longlong) stat_info.index_file_length, TRUE); + table->field[17]->store((longlong) stat_info.delete_length, TRUE); + if (stat_info.create_time) + { + thd->variables.time_zone->gmt_sec_to_TIME(&time, + stat_info.create_time); + table->field[18]->store_time(&time, MYSQL_TIMESTAMP_DATETIME); + table->field[18]->set_notnull(); + } + if (stat_info.update_time) + { + thd->variables.time_zone->gmt_sec_to_TIME(&time, + stat_info.update_time); + table->field[19]->store_time(&time, MYSQL_TIMESTAMP_DATETIME); + table->field[19]->set_notnull(); + } + if (stat_info.check_time) + { + thd->variables.time_zone->gmt_sec_to_TIME(&time, stat_info.check_time); + table->field[20]->store_time(&time, MYSQL_TIMESTAMP_DATETIME); + table->field[20]->set_notnull(); + } + if (file->table_flags() & (ulong) HA_HAS_CHECKSUM) + { + table->field[21]->store((longlong) stat_info.check_sum, TRUE); + table->field[21]->set_notnull(); + } + if (part_elem) + { + if (part_elem->part_comment) + table->field[22]->store(part_elem->part_comment, + strlen(part_elem->part_comment), cs); + else + table->field[22]->store(STRING_WITH_LEN("default"), cs); + if (part_elem->nodegroup_id != UNDEF_NODEGROUP) + table->field[23]->store((longlong) part_elem->nodegroup_id, TRUE); + else + table->field[23]->store(STRING_WITH_LEN("default"), cs); + if (part_elem->tablespace_name) + table->field[24]->store(part_elem->tablespace_name, + strlen(part_elem->tablespace_name), cs); + else + table->field[24]->store(STRING_WITH_LEN("default"), cs); + } + return; +} + + +static int get_schema_partitions_record(THD *thd, struct st_table_list *tables, + TABLE *table, bool res, + const char *base_name, + const char *file_name) +{ + CHARSET_INFO *cs= system_charset_info; + char buff[61]; + String tmp_res(buff, sizeof(buff), cs); + String tmp_str; + TIME time; + TABLE *show_table= tables->table; + handler *file; +#ifdef WITH_PARTITION_STORAGE_ENGINE + partition_info *part_info; +#endif + DBUG_ENTER("get_schema_partitions_record"); + + if (res) + { + if (!tables->view) + push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, + thd->net.last_errno, thd->net.last_error); + thd->clear_error(); + DBUG_RETURN(0); + } + file= show_table->file; +#ifdef WITH_PARTITION_STORAGE_ENGINE + part_info= show_table->part_info; + if (part_info) + { + partition_element *part_elem; + List_iterator<partition_element> part_it(part_info->partitions); + uint part_pos= 0, part_id= 0; + uint no_parts= part_info->no_parts; + handler *part_file; + + restore_record(table, s->default_values); + table->field[1]->store(base_name, strlen(base_name), cs); + table->field[2]->store(file_name, strlen(file_name), cs); + + + /* Partition method*/ + switch (part_info->part_type) { + case RANGE_PARTITION: + table->field[7]->store(partition_keywords[PKW_RANGE].str, + partition_keywords[PKW_RANGE].length, cs); + break; + case LIST_PARTITION: + table->field[7]->store(partition_keywords[PKW_LIST].str, + partition_keywords[PKW_LIST].length, cs); + break; + case HASH_PARTITION: + tmp_res.length(0); + if (part_info->linear_hash_ind) + tmp_res.append(partition_keywords[PKW_LINEAR].str, + partition_keywords[PKW_LINEAR].length); + if (part_info->list_of_part_fields) + tmp_res.append(partition_keywords[PKW_KEY].str, + partition_keywords[PKW_KEY].length); + else + tmp_res.append(partition_keywords[PKW_HASH].str, + partition_keywords[PKW_HASH].length); + table->field[7]->store(tmp_res.ptr(), tmp_res.length(), cs); + break; + default: + DBUG_ASSERT(0); + current_thd->fatal_error(); + DBUG_RETURN(1); + } + table->field[7]->set_notnull(); + + /* Partition expression */ + if (part_info->part_expr) + { + table->field[9]->store(part_info->part_func_string, + part_info->part_func_len, cs); + table->field[9]->set_notnull(); + } + else if (part_info->list_of_part_fields) + { + collect_partition_expr(part_info->part_field_list, &tmp_str); + table->field[9]->store(tmp_str.ptr(), tmp_str.length(), cs); + table->field[9]->set_notnull(); + } + + if (part_info->is_sub_partitioned()) + { + /* Subpartition method */ + if (part_info->list_of_subpart_fields) + table->field[8]->store(partition_keywords[PKW_KEY].str, + partition_keywords[PKW_KEY].length, cs); + else + table->field[8]->store(partition_keywords[PKW_HASH].str, + partition_keywords[PKW_HASH].length, cs); + table->field[8]->set_notnull(); + + /* Subpartition expression */ + if (part_info->subpart_expr) + { + table->field[10]->store(part_info->subpart_func_string, + part_info->subpart_func_len, cs); + table->field[10]->set_notnull(); + } + else if (part_info->list_of_subpart_fields) + { + collect_partition_expr(part_info->subpart_field_list, &tmp_str); + table->field[10]->store(tmp_str.ptr(), tmp_str.length(), cs); + table->field[10]->set_notnull(); + } + } + + while ((part_elem= part_it++)) + { + + + table->field[3]->store(part_elem->partition_name, + strlen(part_elem->partition_name), cs); + table->field[3]->set_notnull(); + /* PARTITION_ORDINAL_POSITION */ + table->field[5]->store((longlong) ++part_pos, TRUE); + table->field[5]->set_notnull(); + + /* Partition description */ + if (part_info->part_type == RANGE_PARTITION) + { + if (part_elem->range_value != LONGLONG_MAX) + table->field[11]->store((longlong) part_elem->range_value, FALSE); + else + table->field[11]->store(partition_keywords[PKW_MAXVALUE].str, + partition_keywords[PKW_MAXVALUE].length, cs); + table->field[11]->set_notnull(); + } + else if (part_info->part_type == LIST_PARTITION) + { + List_iterator<longlong> list_val_it(part_elem->list_val_list); + longlong *list_value; + uint no_items= part_elem->list_val_list.elements; + tmp_str.length(0); + tmp_res.length(0); + while ((list_value= list_val_it++)) + { + tmp_res.set(*list_value, cs); + tmp_str.append(tmp_res); + if (--no_items != 0) + tmp_str.append(","); + }; + table->field[11]->store(tmp_str.ptr(), tmp_str.length(), cs); + table->field[11]->set_notnull(); + } + + if (part_elem->subpartitions.elements) + { + List_iterator<partition_element> sub_it(part_elem->subpartitions); + partition_element *subpart_elem; + uint subpart_pos= 0; + + while ((subpart_elem= sub_it++)) + { + table->field[4]->store(subpart_elem->partition_name, + strlen(subpart_elem->partition_name), cs); + table->field[4]->set_notnull(); + /* SUBPARTITION_ORDINAL_POSITION */ + table->field[6]->store((longlong) ++subpart_pos, TRUE); + table->field[6]->set_notnull(); + + store_schema_partitions_record(thd, table, subpart_elem, + file, part_id); + part_id++; + if(schema_table_store_record(thd, table)) + DBUG_RETURN(1); + } + } + else + { + store_schema_partitions_record(thd, table, part_elem, + file, part_id); + part_id++; + if(schema_table_store_record(thd, table)) + DBUG_RETURN(1); + } + } + DBUG_RETURN(0); + } + else +#endif + { + store_schema_partitions_record(thd, table, 0, file, 0); + if(schema_table_store_record(thd, table)) + DBUG_RETURN(1); + } + DBUG_RETURN(0); +} + + +static interval_type get_real_interval_type(interval_type i_type) +{ + switch (i_type) { + case INTERVAL_YEAR: + return INTERVAL_YEAR; + + case INTERVAL_QUARTER: + case INTERVAL_YEAR_MONTH: + case INTERVAL_MONTH: + return INTERVAL_MONTH; + + case INTERVAL_WEEK: + case INTERVAL_DAY: + return INTERVAL_DAY; + + case INTERVAL_DAY_HOUR: + case INTERVAL_HOUR: + return INTERVAL_HOUR; + + case INTERVAL_DAY_MINUTE: + case INTERVAL_HOUR_MINUTE: + case INTERVAL_MINUTE: + return INTERVAL_MINUTE; + + case INTERVAL_DAY_SECOND: + case INTERVAL_HOUR_SECOND: + case INTERVAL_MINUTE_SECOND: + case INTERVAL_SECOND: + return INTERVAL_SECOND; + + case INTERVAL_DAY_MICROSECOND: + case INTERVAL_HOUR_MICROSECOND: + case INTERVAL_MINUTE_MICROSECOND: + case INTERVAL_SECOND_MICROSECOND: + case INTERVAL_MICROSECOND: + return INTERVAL_MICROSECOND; + } + DBUG_ASSERT(0); + return INTERVAL_SECOND; +} + +extern LEX_STRING interval_type_to_name[]; + +static int +fill_events_copy_to_schema_table(THD *thd, TABLE *sch_table, TABLE *event_table) +{ + const char *wild= thd->lex->wild ? thd->lex->wild->ptr() : NullS; + CHARSET_INFO *scs= system_charset_info; + TIME time; + Event_timed et; + DBUG_ENTER("fill_events_copy_to_schema_tab"); + + restore_record(sch_table, s->default_values); + + if (et.load_from_row(thd->mem_root, event_table)) + { + my_error(ER_CANNOT_LOAD_FROM_TABLE, MYF(0)); + DBUG_RETURN(1); + } + + if (!(!wild || !wild[0] || !wild_compare(et.name.str, wild, 0))) + DBUG_RETURN(0); + + //->field[0] is EVENT_CATALOG and is by default NULL + + sch_table->field[1]->store(et.dbname.str, et.dbname.length, scs); + sch_table->field[2]->store(et.name.str, et.name.length, scs); + sch_table->field[3]->store(et.definer.str, et.definer.length, scs); + sch_table->field[4]->store(et.body.str, et.body.length, scs); + + /* [9] is SQL_MODE */ + { + byte *sql_mode_str; + ulong sql_mode_len=0; + sql_mode_str= + sys_var_thd_sql_mode::symbolic_mode_representation(thd, et.sql_mode, + &sql_mode_len); + sch_table->field[9]->store((const char*)sql_mode_str, sql_mode_len, scs); + } + + if (et.expression) + { + String show_str; + //type + sch_table->field[5]->store(STRING_WITH_LEN("RECURRING"), scs); + /* execute_at */ + sch_table->field[6]->set_null(); + /* interval_value */ + //interval_type + if (event_reconstruct_interval_expression(&show_str, et.interval, + et.expression)) + DBUG_RETURN(1); + + sch_table->field[7]->set_notnull(); + sch_table->field[7]->store(show_str.c_ptr(), show_str.length(), scs); + + LEX_STRING *ival= &interval_type_to_name[et.interval]; + sch_table->field[8]->set_notnull(); + sch_table->field[8]->store(ival->str, ival->length, scs); + + //starts & ends + sch_table->field[10]->set_notnull(); + sch_table->field[10]->store_time(&et.starts, MYSQL_TIMESTAMP_DATETIME); + + if (!et.ends_null) + { + sch_table->field[11]->set_notnull(); + sch_table->field[11]->store_time(&et.ends, MYSQL_TIMESTAMP_DATETIME); + } + } + else + { + //type + sch_table->field[5]->store(STRING_WITH_LEN("ONE TIME"), scs); + + sch_table->field[6]->set_notnull(); + sch_table->field[6]->store_time(&et.execute_at, MYSQL_TIMESTAMP_DATETIME); + } + + //status + if (et.status == MYSQL_EVENT_ENABLED) + sch_table->field[12]->store(STRING_WITH_LEN("ENABLED"), scs); + else + sch_table->field[12]->store(STRING_WITH_LEN("DISABLED"), scs); + + //on_completion + if (et.on_completion == MYSQL_EVENT_ON_COMPLETION_DROP) + sch_table->field[13]->store(STRING_WITH_LEN("NOT PRESERVE"), scs); + else + sch_table->field[13]->store(STRING_WITH_LEN("PRESERVE"), scs); + + int not_used=0; + number_to_datetime(et.created, &time, 0, ¬_used); + DBUG_ASSERT(not_used==0); + sch_table->field[14]->store_time(&time, MYSQL_TIMESTAMP_DATETIME); + + number_to_datetime(et.modified, &time, 0, ¬_used); + DBUG_ASSERT(not_used==0); + sch_table->field[15]->store_time(&time, MYSQL_TIMESTAMP_DATETIME); + + if (et.last_executed.year) + sch_table->field[16]->store_time(&et.last_executed,MYSQL_TIMESTAMP_DATETIME); + else + sch_table->field[16]->set_null(); + + sch_table->field[17]->store(et.comment.str, et.comment.length, scs); + + if (schema_table_store_record(thd, sch_table)) + DBUG_RETURN(1); + + DBUG_RETURN(0); +} + + +int fill_schema_events(THD *thd, TABLE_LIST *tables, COND *cond) +{ + TABLE *table= tables->table; + CHARSET_INFO *scs= system_charset_info; + TABLE *event_table= NULL; + Open_tables_state backup; + int ret=0; + bool verbose= false; + char definer[HOSTNAME_LENGTH+USERNAME_LENGTH+2]; + bool use_prefix_scanning= true; + uint key_len= 0; + byte *key_buf= NULL; + LINT_INIT(key_buf); + + DBUG_ENTER("fill_schema_events"); + + strxmov(definer, thd->security_ctx->priv_user,"@",thd->security_ctx->priv_host, + NullS); + + DBUG_PRINT("info",("db=%s current_user=%s", thd->lex->select_lex.db, definer)); + + thd->reset_n_backup_open_tables_state(&backup); + + if ((ret= evex_open_event_table(thd, TL_READ, &event_table))) + { + sql_print_error("Table mysql.event is damaged."); + ret= 1; + goto err; + } + + event_table->file->ha_index_init(0, 1); + + /* + see others' events only if you have PROCESS_ACL !! + thd->lex->verbose is set either if SHOW FULL EVENTS or + in case of SELECT FROM I_S.EVENTS + */ + verbose= (thd->lex->verbose + && (thd->security_ctx->master_access & PROCESS_ACL)); + + if (verbose && thd->security_ctx->user) + { + ret= event_table->file->index_first(event_table->record[0]); + use_prefix_scanning= false; + } + else + { + event_table->field[EVEX_FIELD_DEFINER]->store(definer, strlen(definer), scs); + key_len= event_table->key_info->key_part[0].store_length; + + if (thd->lex->select_lex.db) + { + event_table->field[EVEX_FIELD_DB]-> + store(thd->lex->select_lex.db, strlen(thd->lex->select_lex.db), scs); + key_len+= event_table->key_info->key_part[1].store_length; + } + if (!(key_buf= (byte *)alloc_root(thd->mem_root, key_len))) + { + ret= 1; + goto err; + } + + key_copy(key_buf, event_table->record[0], event_table->key_info, key_len); + ret= event_table->file->index_read(event_table->record[0], key_buf, key_len, + HA_READ_PREFIX); + } + + if (ret) + { + ret= (ret == HA_ERR_END_OF_FILE || ret == HA_ERR_KEY_NOT_FOUND) ? 0 : 1; + goto err; + } + + while (!ret) + { + if ((ret= fill_events_copy_to_schema_table(thd, table, event_table))) + goto err; + + if (use_prefix_scanning) + ret= event_table->file-> + index_next_same(event_table->record[0], key_buf, key_len); + else + ret= event_table->file->index_next(event_table->record[0]); + } + // ret is guaranteed to be != 0 + ret= (ret != HA_ERR_END_OF_FILE); +err: + if (event_table) + { + event_table->file->ha_index_end(); + close_thread_tables(thd); + } + + thd->restore_backup_open_tables_state(&backup); + DBUG_RETURN(ret); +} + + int fill_open_tables(THD *thd, TABLE_LIST *tables, COND *cond) { DBUG_ENTER("fill_open_tables"); @@ -3379,7 +4171,7 @@ int fill_variables(THD *thd, TABLE_LIST *tables, COND *cond) LEX *lex= thd->lex; const char *wild= lex->wild ? lex->wild->ptr() : NullS; pthread_mutex_lock(&LOCK_global_system_variables); - res= show_status_array(thd, wild, init_vars, + res= show_status_array(thd, wild, init_vars, lex->option_type, 0, "", tables->table); pthread_mutex_unlock(&LOCK_global_system_variables); DBUG_RETURN(res); @@ -3393,12 +4185,13 @@ int fill_status(THD *thd, TABLE_LIST *tables, COND *cond) const char *wild= lex->wild ? lex->wild->ptr() : NullS; int res= 0; STATUS_VAR tmp; - ha_update_statistics(); /* Export engines statistics */ pthread_mutex_lock(&LOCK_status); if (lex->option_type == OPT_GLOBAL) calc_sum_of_all_status(&tmp); - res= show_status_array(thd, wild, status_vars, OPT_GLOBAL, - (lex->option_type == OPT_GLOBAL ? + res= show_status_array(thd, wild, + (SHOW_VAR *)all_status_vars.buffer, + OPT_GLOBAL, + (lex->option_type == OPT_GLOBAL ? &tmp: &thd->status_var), "",tables->table); pthread_mutex_unlock(&LOCK_status); DBUG_RETURN(res); @@ -3730,8 +4523,8 @@ int mysql_schema_table(THD *thd, LEX *lex, TABLE_LIST *table_list) table->alias_name_used= my_strcasecmp(table_alias_charset, table_list->schema_table_name, table_list->alias); - table_list->table_name= (char*) table->s->table_name; - table_list->table_name_length= strlen(table->s->table_name); + table_list->table_name= table->s->table_name.str; + table_list->table_name_length= table->s->table_name.length; table_list->table= table; table->next= thd->derived_tables; thd->derived_tables= table; @@ -3800,6 +4593,7 @@ int make_schema_select(THD *thd, SELECT_LEX *sel, ST_SCHEMA_TABLE *schema_table= get_schema_table(schema_table_idx); LEX_STRING db, table; DBUG_ENTER("mysql_schema_select"); + DBUG_PRINT("enter", ("mysql_schema_select: %s", schema_table->table_name)); /* We have to make non const db_name & table_name because of lower_case_table_names @@ -3877,6 +4671,38 @@ bool get_schema_tables_result(JOIN *join) DBUG_RETURN(result); } +struct run_hton_fill_schema_files_args +{ + TABLE_LIST *tables; + COND *cond; +}; + +static my_bool run_hton_fill_schema_files(THD *thd, st_plugin_int *plugin, + void *arg) +{ + struct run_hton_fill_schema_files_args *args= + (run_hton_fill_schema_files_args *) arg; + handlerton *hton= (handlerton *) plugin->plugin->info; + if(hton->fill_files_table) + hton->fill_files_table(thd, args->tables, args->cond); + return false; +} + +int fill_schema_files(THD *thd, TABLE_LIST *tables, COND *cond) +{ + int i; + TABLE *table= tables->table; + DBUG_ENTER("fill_schema_files"); + + struct run_hton_fill_schema_files_args args; + args.tables= tables; + args.cond= cond; + + plugin_foreach(thd, run_hton_fill_schema_files, + MYSQL_STORAGE_ENGINE_PLUGIN, &args); + + DBUG_RETURN(0); +} ST_FIELD_INFO schema_fields_info[]= { @@ -3963,6 +4789,43 @@ ST_FIELD_INFO collation_fields_info[]= }; +ST_FIELD_INFO engines_fields_info[]= +{ + {"ENGINE", 64, MYSQL_TYPE_STRING, 0, 0, "Engine"}, + {"SUPPORT", 8, MYSQL_TYPE_STRING, 0, 0, "Support"}, + {"COMMENT", 80, MYSQL_TYPE_STRING, 0, 0, "Comment"}, + {"TRANSACTIONS", 3, MYSQL_TYPE_STRING, 0, 0, "Transactions"}, + {"XA", 3, MYSQL_TYPE_STRING, 0, 0, "XA"}, + {"SAVEPOINTS", 3 ,MYSQL_TYPE_STRING, 0, 0, "Savepoints"}, + {0, 0, MYSQL_TYPE_STRING, 0, 0, 0} +}; + + +ST_FIELD_INFO events_fields_info[]= +{ + {"EVENT_CATALOG", NAME_LEN, MYSQL_TYPE_STRING, 0, 1, 0}, + {"EVENT_SCHEMA", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Db"}, + {"EVENT_NAME", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Name"}, + {"DEFINER", 77, MYSQL_TYPE_STRING, 0, 0, "Definer"}, + {"EVENT_BODY", 65535, MYSQL_TYPE_STRING, 0, 0, 0}, + {"EVENT_TYPE", 9, MYSQL_TYPE_STRING, 0, 0, "Type"}, + {"EXECUTE_AT", 0, MYSQL_TYPE_TIMESTAMP, 0, 1, "Execute at"}, + {"INTERVAL_VALUE", 256, MYSQL_TYPE_STRING, 0, 1, "Interval value"}, + {"INTERVAL_FIELD", 18, MYSQL_TYPE_STRING, 0, 1, "Interval field"}, + {"SQL_MODE", 65535, MYSQL_TYPE_STRING, 0, 0, 0}, + {"STARTS", 0, MYSQL_TYPE_TIMESTAMP, 0, 1, "Starts"}, + {"ENDS", 0, MYSQL_TYPE_TIMESTAMP, 0, 1, "Ends"}, + {"STATUS", 8, MYSQL_TYPE_STRING, 0, 0, "Status"}, + {"ON_COMPLETION", 12, MYSQL_TYPE_STRING, 0, 0, 0}, + {"CREATED", 0, MYSQL_TYPE_TIMESTAMP, 0, 0, 0}, + {"LAST_ALTERED", 0, MYSQL_TYPE_TIMESTAMP, 0, 0, 0}, + {"LAST_EXECUTED", 0, MYSQL_TYPE_TIMESTAMP, 0, 1, 0}, + {"EVENT_COMMENT", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, 0}, + {0, 0, MYSQL_TYPE_STRING, 0, 0, 0} +}; + + + ST_FIELD_INFO coll_charset_app_fields_info[]= { {"COLLATION_NAME", 64, MYSQL_TYPE_STRING, 0, 0, 0}, @@ -4153,6 +5016,37 @@ ST_FIELD_INFO triggers_fields_info[]= }; +ST_FIELD_INFO partitions_fields_info[]= +{ + {"TABLE_CATALOG", FN_REFLEN, MYSQL_TYPE_STRING, 0, 1, 0}, + {"TABLE_SCHEMA",NAME_LEN, MYSQL_TYPE_STRING, 0, 0, 0}, + {"TABLE_NAME", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, 0}, + {"PARTITION_NAME", NAME_LEN, MYSQL_TYPE_STRING, 0, 1, 0}, + {"SUBPARTITION_NAME", NAME_LEN, MYSQL_TYPE_STRING, 0, 1, 0}, + {"PARTITION_ORDINAL_POSITION", 21 , MYSQL_TYPE_LONG, 0, 1, 0}, + {"SUBPARTITION_ORDINAL_POSITION", 21 , MYSQL_TYPE_LONG, 0, 1, 0}, + {"PARTITION_METHOD", 12, MYSQL_TYPE_STRING, 0, 1, 0}, + {"SUBPARTITION_METHOD", 5, MYSQL_TYPE_STRING, 0, 1, 0}, + {"PARTITION_EXPRESSION", 65535, MYSQL_TYPE_STRING, 0, 1, 0}, + {"SUBPARTITION_EXPRESSION", 65535, MYSQL_TYPE_STRING, 0, 1, 0}, + {"PARTITION_DESCRIPTION", 65535, MYSQL_TYPE_STRING, 0, 1, 0}, + {"TABLE_ROWS", 21 , MYSQL_TYPE_LONG, 0, 0, 0}, + {"AVG_ROW_LENGTH", 21 , MYSQL_TYPE_LONG, 0, 0, 0}, + {"DATA_LENGTH", 21 , MYSQL_TYPE_LONG, 0, 0, 0}, + {"MAX_DATA_LENGTH", 21 , MYSQL_TYPE_LONG, 0, 1, 0}, + {"INDEX_LENGTH", 21 , MYSQL_TYPE_LONG, 0, 0, 0}, + {"DATA_FREE", 21 , MYSQL_TYPE_LONG, 0, 0, 0}, + {"CREATE_TIME", 0, MYSQL_TYPE_TIMESTAMP, 0, 1, 0}, + {"UPDATE_TIME", 0, MYSQL_TYPE_TIMESTAMP, 0, 1, 0}, + {"CHECK_TIME", 0, MYSQL_TYPE_TIMESTAMP, 0, 1, 0}, + {"CHECKSUM", 21 , MYSQL_TYPE_LONG, 0, 1, 0}, + {"PARTITION_COMMENT", 80, MYSQL_TYPE_STRING, 0, 0, 0}, + {"NODEGROUP", 21 , MYSQL_TYPE_LONG, 0, 0, 0}, + {"TABLESPACE_NAME", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, 0}, + {0, 0, MYSQL_TYPE_STRING, 0, 0, 0} +}; + + ST_FIELD_INFO variables_fields_info[]= { {"Variable_name", 80, MYSQL_TYPE_STRING, 0, 0, "Variable_name"}, @@ -4161,8 +5055,82 @@ ST_FIELD_INFO variables_fields_info[]= }; +ST_FIELD_INFO processlist_fields_info[]= +{ + {"ID", 4, MYSQL_TYPE_LONG, 0, 0, "Id"}, + {"USER", 16, MYSQL_TYPE_STRING, 0, 0, "User"}, + {"HOST", LIST_PROCESS_HOST_LEN, MYSQL_TYPE_STRING, 0, 0, "Host"}, + {"DB", NAME_LEN, MYSQL_TYPE_STRING, 0, 1, "Db"}, + {"COMMAND", 16, MYSQL_TYPE_STRING, 0, 0, "Command"}, + {"TIME", 4, MYSQL_TYPE_LONG, 0, 0, "Time"}, + {"STATE", 30, MYSQL_TYPE_STRING, 0, 1, "State"}, + {"INFO", PROCESS_LIST_WIDTH, MYSQL_TYPE_STRING, 0, 1, "Info"}, + {0, 0, MYSQL_TYPE_STRING, 0, 0, 0} +}; + + +ST_FIELD_INFO plugin_fields_info[]= +{ + {"PLUGIN_NAME", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Name"}, + {"PLUGIN_VERSION", 20, MYSQL_TYPE_STRING, 0, 0, 0}, + {"PLUGIN_STATUS", 10, MYSQL_TYPE_STRING, 0, 0, "Status"}, + {"PLUGIN_TYPE", 80, MYSQL_TYPE_STRING, 0, 0, "Type"}, + {"PLUGIN_TYPE_VERSION", 20, MYSQL_TYPE_STRING, 0, 0, 0}, + {"PLUGIN_LIBRARY", NAME_LEN, MYSQL_TYPE_STRING, 0, 1, "Library"}, + {"PLUGIN_LIBRARY_VERSION", 20, MYSQL_TYPE_STRING, 0, 1, 0}, + {"PLUGIN_AUTHOR", NAME_LEN, MYSQL_TYPE_STRING, 0, 1, 0}, + {"PLUGIN_DESCRIPTION", 65535, MYSQL_TYPE_STRING, 0, 1, 0}, + {0, 0, MYSQL_TYPE_STRING, 0, 0, 0} +}; + +ST_FIELD_INFO files_fields_info[]= +{ + {"FILE_ID", 4, MYSQL_TYPE_LONG, 0, 0, 0}, + {"FILE_NAME", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, 0}, + {"FILE_TYPE", 20, MYSQL_TYPE_STRING, 0, 0, 0}, + {"TABLESPACE_NAME", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, 0}, + {"TABLE_CATALOG", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, 0}, + {"TABLE_SCHEMA", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, 0}, + {"TABLE_NAME", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, 0}, + {"LOGFILE_GROUP_NAME", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, 0}, + {"LOGFILE_GROUP_NUMBER", 4, MYSQL_TYPE_LONG, 0, 0, 0}, + {"ENGINE", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, 0}, + {"FULLTEXT_KEYS", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, 0}, + {"DELETED_ROWS", 4, MYSQL_TYPE_LONG, 0, 0, 0}, + {"UPDATE_COUNT", 4, MYSQL_TYPE_LONG, 0, 0, 0}, + {"FREE_EXTENTS", 4, MYSQL_TYPE_LONG, 0, 0, 0}, + {"TOTAL_EXTENTS", 4, MYSQL_TYPE_LONG, 0, 0, 0}, + {"EXTENT_SIZE", 4, MYSQL_TYPE_LONG, 0, 0, 0}, + {"INITIAL_SIZE", 8, MYSQL_TYPE_LONGLONG, 0, 0, 0}, + {"MAXIMUM_SIZE", 8, MYSQL_TYPE_LONGLONG, 0, 0, 0}, + {"AUTOEXTEND_SIZE", 8, MYSQL_TYPE_LONGLONG, 0, 0, 0}, + {"CREATION_TIME", 0, MYSQL_TYPE_TIMESTAMP, 0, 0, 0}, + {"LAST_UPDATE_TIME", 0, MYSQL_TYPE_TIMESTAMP, 0, 0, 0}, + {"LAST_ACCESS_TIME", 0, MYSQL_TYPE_TIMESTAMP, 0, 0, 0}, + {"RECOVER_TIME", 4, MYSQL_TYPE_LONG, 0, 0, 0}, + {"TRANSACTION_COUNTER", 4, MYSQL_TYPE_LONG, 0, 0, 0}, + {"VERSION", 21 , MYSQL_TYPE_LONG, 0, 1, "Version"}, + {"ROW_FORMAT", 10, MYSQL_TYPE_STRING, 0, 1, "Row_format"}, + {"TABLE_ROWS", 21 , MYSQL_TYPE_LONG, 0, 1, "Rows"}, + {"AVG_ROW_LENGTH", 21 , MYSQL_TYPE_LONG, 0, 1, "Avg_row_length"}, + {"DATA_LENGTH", 21 , MYSQL_TYPE_LONG, 0, 1, "Data_length"}, + {"MAX_DATA_LENGTH", 21 , MYSQL_TYPE_LONG, 0, 1, "Max_data_length"}, + {"INDEX_LENGTH", 21 , MYSQL_TYPE_LONG, 0, 1, "Index_length"}, + {"DATA_FREE", 21 , MYSQL_TYPE_LONG, 0, 1, "Data_free"}, + {"CREATE_TIME", 0, MYSQL_TYPE_TIMESTAMP, 0, 1, "Create_time"}, + {"UPDATE_TIME", 0, MYSQL_TYPE_TIMESTAMP, 0, 1, "Update_time"}, + {"CHECK_TIME", 0, MYSQL_TYPE_TIMESTAMP, 0, 1, "Check_time"}, + {"CHECKSUM", 21 , MYSQL_TYPE_LONG, 0, 1, "Checksum"}, + {"STATUS", 20, MYSQL_TYPE_STRING, 0, 0, 0}, + {"EXTRA", 255, MYSQL_TYPE_STRING, 0, 0, 0}, + {0, 0, MYSQL_TYPE_STRING, 0, 0, 0} +}; + /* Description of ST_FIELD_INFO in table.h + + Make sure that the order of schema_tables and enum_schema_tables are the same. + */ ST_SCHEMA_TABLE schema_tables[]= @@ -4177,10 +5145,22 @@ ST_SCHEMA_TABLE schema_tables[]= get_all_tables, make_columns_old_format, get_schema_column_record, 1, 2, 0}, {"COLUMN_PRIVILEGES", column_privileges_fields_info, create_schema_table, fill_schema_column_privileges, 0, 0, -1, -1, 0}, + {"ENGINES", engines_fields_info, create_schema_table, + fill_schema_engines, make_old_format, 0, -1, -1, 0}, + {"EVENTS", events_fields_info, create_schema_table, + fill_schema_events, make_old_format, 0, -1, -1, 0}, + {"FILES", files_fields_info, create_schema_table, + fill_schema_files, 0, 0, -1, -1, 0}, {"KEY_COLUMN_USAGE", key_column_usage_fields_info, create_schema_table, get_all_tables, 0, get_schema_key_column_usage_record, 4, 5, 0}, {"OPEN_TABLES", open_tables_fields_info, create_schema_table, fill_open_tables, make_old_format, 0, -1, -1, 1}, + {"PARTITIONS", partitions_fields_info, create_schema_table, + get_all_tables, 0, get_schema_partitions_record, 1, 2, 0}, + {"PLUGINS", plugin_fields_info, create_schema_table, + fill_plugins, make_old_format, 0, -1, -1, 0}, + {"PROCESSLIST", processlist_fields_info, create_schema_table, + fill_schema_processlist, make_old_format, 0, -1, -1, 0}, {"ROUTINES", proc_fields_info, create_schema_table, fill_schema_proc, make_proc_old_format, 0, -1, -1, 0}, {"SCHEMATA", schema_fields_info, create_schema_table, diff --git a/sql/sql_show.h b/sql/sql_show.h new file mode 100644 index 00000000000..6fce5e94ca3 --- /dev/null +++ b/sql/sql_show.h @@ -0,0 +1,17 @@ + +#ifndef SQL_SHOW_H +#define SQL_SHOW_H + +/* Forward declarations */ +class String; +class THD; +struct st_ha_create_information; +struct st_table_list; +typedef st_ha_create_information HA_CREATE_INFO; +typedef st_table_list TABLE_LIST; + +int store_create_info(THD *thd, TABLE_LIST *table_list, String *packet, + HA_CREATE_INFO *create_info_arg); +int view_store_create_info(THD *thd, TABLE_LIST *table, String *buff); + +#endif /* SQL_SHOW_H */ diff --git a/sql/sql_table.cc b/sql/sql_table.cc index 0bae714d7dc..757321b5ccf 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -17,19 +17,19 @@ /* drop and alter of tables */ #include "mysql_priv.h" -#ifdef HAVE_BERKELEY_DB -#include "ha_berkeley.h" -#endif #include <hash.h> #include <myisam.h> #include <my_dir.h> #include "sp_head.h" #include "sql_trigger.h" +#include "sql_show.h" #ifdef __WIN__ #include <io.h> #endif +int creating_table= 0; // How many mysql_create_table are running + const char *primary_key_name="PRIMARY"; static bool check_if_keyname_exists(const char *name,KEY *start, KEY *end); @@ -42,34 +42,371 @@ static int copy_data_between_tables(TABLE *from,TABLE *to, ha_rows *copied,ha_rows *deleted); static bool prepare_blob_field(THD *thd, create_field *sql_field); static bool check_engine(THD *thd, const char *table_name, - enum db_type *new_engine); + HA_CREATE_INFO *create_info); +static int mysql_prepare_table(THD *thd, HA_CREATE_INFO *create_info, + List<create_field> *fields, + List<Key> *keys, bool tmp_table, + uint *db_options, + handler *file, KEY **key_info_buffer, + uint *key_count, int select_field_count); + +#define MYSQL50_TABLE_NAME_PREFIX "#mysql50#" +#define MYSQL50_TABLE_NAME_PREFIX_LENGTH 9 + +uint filename_to_tablename(const char *from, char *to, uint to_length) +{ + uint errors, res= strconvert(&my_charset_filename, from, + system_charset_info, to, to_length, &errors); + if (errors) // Old 5.0 name + { + res= strxnmov(to, to_length, MYSQL50_TABLE_NAME_PREFIX, from, NullS) - to; + sql_print_error("Invalid (old?) table or database name '%s'", from); + /* + TODO: add a stored procedure for fix table and database names, + and mention its name in error log. + */ + } + return res; +} + + +uint tablename_to_filename(const char *from, char *to, uint to_length) +{ + uint errors; + if (from[0] == '#' && !strncmp(from, MYSQL50_TABLE_NAME_PREFIX, + MYSQL50_TABLE_NAME_PREFIX_LENGTH)) + return my_snprintf(to, to_length, "%s", from + 9); + return strconvert(system_charset_info, from, + &my_charset_filename, to, to_length, &errors); +} /* - Build the path to a file for a table (or the base path that can - then have various extensions stuck on to it). + Creates path to a file: mysql_data_dir/db/table.ext SYNOPSIS - build_table_path() - buff Buffer to build the path into - bufflen sizeof(buff) - db Name of database - table Name of table - ext Filename extension + build_table_filename() + buff where to write result + bufflen buff size + db database name, in system_charset_info + table table name, in system_charset_info + ext file extension + + NOTES + + Uses database and table name, and extension to create + a file name in mysql_data_dir. Database and table + names are converted from system_charset_info into "fscs". + 'ext' is not converted. RETURN - 0 Error - # Size of path - */ -static uint build_table_path(char *buff, size_t bufflen, const char *db, - const char *table, const char *ext) +*/ + + +uint build_table_filename(char *buff, size_t bufflen, const char *db, + const char *table, const char *ext) +{ + uint length; + char dbbuff[FN_REFLEN]; + char tbbuff[FN_REFLEN]; + VOID(tablename_to_filename(table, tbbuff, sizeof(tbbuff))); + VOID(tablename_to_filename(db, dbbuff, sizeof(dbbuff))); + strxnmov(buff, bufflen, + mysql_data_home, "/", dbbuff, "/", tbbuff, ext, NullS); + length= unpack_filename(buff, buff); + return length; +} + + +uint build_tmptable_filename(THD* thd, char *buff, size_t bufflen) { - strxnmov(buff, bufflen-1, mysql_data_home, "/", db, "/", table, ext, - NullS); - return unpack_filename(buff,buff); + uint length; + char tbbuff[FN_REFLEN]; + char tmp_table_name[tmp_file_prefix_length+22+22+22+3]; + my_snprintf(tmp_table_name, sizeof(tmp_table_name), + "%s%lx_%lx_%x", + tmp_file_prefix, current_pid, + thd->thread_id, thd->tmp_table++); + VOID(tablename_to_filename(tmp_table_name, tbbuff, sizeof(tbbuff))); + strxnmov(buff, bufflen, mysql_tmpdir, "/", tbbuff, reg_ext, NullS); + length= unpack_filename(buff, buff); + return length; } +/* + Return values for compare_tables(). + If you make compare_tables() non-static, move them to a header file. +*/ +#define ALTER_TABLE_DATA_CHANGED 1 +#define ALTER_TABLE_INDEX_CHANGED 2 + + +/* + SYNOPSIS + mysql_copy_create_list() + orig_create_list Original list of created fields + inout::new_create_list Copy of original list + + RETURN VALUES + FALSE Success + TRUE Memory allocation error + + DESCRIPTION + mysql_prepare_table destroys the create_list and in some cases we need + this lists for more purposes. Thus we copy it specifically for use + by mysql_prepare_table +*/ + +static int mysql_copy_create_list(List<create_field> *orig_create_list, + List<create_field> *new_create_list) +{ + List_iterator<create_field> prep_field_it(*orig_create_list); + create_field *prep_field; + DBUG_ENTER("mysql_copy_create_list"); + + while ((prep_field= prep_field_it++)) + { + create_field *field= new create_field(*prep_field); + if (!field || new_create_list->push_back(field)) + { + mem_alloc_error(2); + DBUG_RETURN(TRUE); + } + } + DBUG_RETURN(FALSE); +} + + +/* + SYNOPSIS + mysql_copy_key_list() + orig_key Original list of keys + inout::new_key Copy of original list + + RETURN VALUES + FALSE Success + TRUE Memory allocation error + + DESCRIPTION + mysql_prepare_table destroys the key list and in some cases we need + this lists for more purposes. Thus we copy it specifically for use + by mysql_prepare_table +*/ + +static int mysql_copy_key_list(List<Key> *orig_key, + List<Key> *new_key) +{ + List_iterator<Key> prep_key_it(*orig_key); + Key *prep_key; + DBUG_ENTER("mysql_copy_key_list"); + + while ((prep_key= prep_key_it++)) + { + List<key_part_spec> prep_columns; + List_iterator<key_part_spec> prep_col_it(prep_key->columns); + key_part_spec *prep_col; + Key *temp_key; + + while ((prep_col= prep_col_it++)) + { + key_part_spec *prep_key_part; + + if (!(prep_key_part= new key_part_spec(*prep_col))) + { + mem_alloc_error(sizeof(key_part_spec)); + DBUG_RETURN(TRUE); + } + if (prep_columns.push_back(prep_key_part)) + { + mem_alloc_error(2); + DBUG_RETURN(TRUE); + } + } + if (!(temp_key= new Key(prep_key->type, prep_key->name, + prep_key->algorithm, + prep_key->generated, + prep_columns, + prep_key->parser_name))) + { + mem_alloc_error(sizeof(Key)); + DBUG_RETURN(TRUE); + } + if (new_key->push_back(temp_key)) + { + mem_alloc_error(2); + DBUG_RETURN(TRUE); + } + } + DBUG_RETURN(FALSE); +} + + +/* + SYNOPSIS + mysql_write_frm() + lpt Struct carrying many parameters needed for this + method + flags Flags as defined below + WFRM_INITIAL_WRITE If set we need to prepare table before + creating the frm file + WFRM_CREATE_HANDLER_FILES If set we need to create the handler file as + part of the creation of the frm file + WFRM_PACK_FRM If set we should pack the frm file and delete + the frm file + + RETURN VALUES + TRUE Error + FALSE Success + + DESCRIPTION + A support method that creates a new frm file and in this process it + regenerates the partition data. It works fine also for non-partitioned + tables since it only handles partitioned data if it exists. +*/ + +bool mysql_write_frm(ALTER_PARTITION_PARAM_TYPE *lpt, uint flags) +{ + /* + Prepare table to prepare for writing a new frm file where the + partitions in add/drop state have temporarily changed their state + We set tmp_table to avoid get errors on naming of primary key index. + */ + int error= 0; + char path[FN_REFLEN+1]; + char frm_name[FN_REFLEN+1]; + DBUG_ENTER("mysql_write_frm"); + + if (flags & WFRM_INITIAL_WRITE) + { + error= mysql_copy_create_list(lpt->create_list, + &lpt->new_create_list); + error+= mysql_copy_key_list(lpt->key_list, + &lpt->new_key_list); + if (error) + { + DBUG_RETURN(TRUE); + } + } + build_table_filename(path, sizeof(path), lpt->db, lpt->table_name, ""); + strxmov(frm_name, path, reg_ext, NullS); + if ((flags & WFRM_INITIAL_WRITE) && + (mysql_prepare_table(lpt->thd, lpt->create_info, &lpt->new_create_list, + &lpt->new_key_list,/*tmp_table*/ 1, &lpt->db_options, + lpt->table->file, &lpt->key_info_buffer, + &lpt->key_count, /*select_field_count*/ 0))) + { + DBUG_RETURN(TRUE); + } +#ifdef WITH_PARTITION_STORAGE_ENGINE + { + partition_info *part_info= lpt->table->part_info; + char *part_syntax_buf; + uint syntax_len, i; + bool any_unnormal_state= FALSE; + + if (part_info) + { + uint max_part_state_len= part_info->partitions.elements + + part_info->temp_partitions.elements; + if (!(part_info->part_state= (uchar*)sql_alloc(max_part_state_len))) + { + DBUG_RETURN(TRUE); + } + part_info->part_state_len= 0; + if (!(part_syntax_buf= generate_partition_syntax(part_info, + &syntax_len, + TRUE, FALSE))) + { + DBUG_RETURN(TRUE); + } + for (i= 0; i < part_info->part_state_len; i++) + { + enum partition_state part_state= + (enum partition_state)part_info->part_state[i]; + if (part_state != PART_NORMAL && part_state != PART_IS_ADDED) + any_unnormal_state= TRUE; + } + if (!any_unnormal_state) + { + part_info->part_state= NULL; + part_info->part_state_len= 0; + } + part_info->part_info_string= part_syntax_buf; + part_info->part_info_len= syntax_len; + } + } +#endif + /* + We write the frm file with the LOCK_open mutex since otherwise we could + overwrite the frm file as another is reading it in open_table. + */ + lpt->create_info->table_options= lpt->db_options; + VOID(pthread_mutex_lock(&LOCK_open)); + if ((mysql_create_frm(lpt->thd, frm_name, lpt->db, lpt->table_name, + lpt->create_info, lpt->new_create_list, lpt->key_count, + lpt->key_info_buffer, lpt->table->file)) || + ((flags & WFRM_CREATE_HANDLER_FILES) && + lpt->table->file->create_handler_files(path))) + { + error= 1; + goto end; + } + if (flags & WFRM_PACK_FRM) + { + /* + We need to pack the frm file and after packing it we delete the + frm file to ensure it doesn't get used. This is only used for + handlers that have the main version of the frm file stored in the + handler. + */ + const void *data= 0; + uint length= 0; + if (readfrm(path, &data, &length) || + packfrm(data, length, &lpt->pack_frm_data, &lpt->pack_frm_len)) + { + my_free((char*)data, MYF(MY_ALLOW_ZERO_PTR)); + my_free((char*)lpt->pack_frm_data, MYF(MY_ALLOW_ZERO_PTR)); + mem_alloc_error(length); + error= 1; + goto end; + } + error= my_delete(frm_name, MYF(MY_WME)); + } + /* Frm file have been updated to reflect the change about to happen. */ +end: + VOID(pthread_mutex_unlock(&LOCK_open)); + DBUG_RETURN(error); +} + + +/* + SYNOPSIS + write_bin_log() + thd Thread object + clear_error is clear_error to be called + query Query to log + query_length Length of query + + RETURN VALUES + NONE + + DESCRIPTION + Write the binlog if open, routine used in multiple places in this + file +*/ + +void write_bin_log(THD *thd, bool clear_error, + char const *query, ulong query_length) +{ + if (mysql_bin_log.is_open()) + { + if (clear_error) + thd->clear_error(); + thd->binlog_query(THD::STMT_QUERY_TYPE, + query, query_length, FALSE, FALSE); + } +} /* @@ -219,13 +556,37 @@ int mysql_rm_table_part2(THD *thd, TABLE_LIST *tables, bool if_exists, bool dont_log_query) { TABLE_LIST *table; - char path[FN_REFLEN], *alias; + char path[FN_REFLEN], *alias; + uint path_length; String wrong_tables; int error; + int non_temp_tables_count= 0; bool some_tables_deleted=0, tmp_table_deleted=0, foreign_key_error=0; - + String built_query; DBUG_ENTER("mysql_rm_table_part2"); + if (thd->current_stmt_binlog_row_based && !dont_log_query) + { + built_query.set_charset(system_charset_info); + if (if_exists) + built_query.append("DROP TABLE IF EXISTS "); + else + built_query.append("DROP TABLE "); + } + /* + If we have the table in the definition cache, we don't have to check the + .frm file to find if the table is a normal table (not view) and what + engine to use. + */ + + for (table= tables; table; table= table->next_local) + { + TABLE_SHARE *share; + table->db_type= NULL; + if ((share= get_cached_table_share(table->db, table->table_name))) + table->db_type= share->db_type; + } + if (lock_table_names(thd, tables)) DBUG_RETURN(1); @@ -235,16 +596,42 @@ int mysql_rm_table_part2(THD *thd, TABLE_LIST *tables, bool if_exists, for (table= tables; table; table= table->next_local) { char *db=table->db; - db_type table_type= DB_TYPE_UNKNOWN; + handlerton *table_type; + enum legacy_db_type frm_db_type; mysql_ha_flush(thd, table, MYSQL_HA_CLOSE_FINAL, TRUE); - if (!close_temporary_table(thd, db, table->table_name)) + if (!close_temporary_table(thd, table)) { tmp_table_deleted=1; continue; // removed temporary table } + /* + If row-based replication is used and the table is not a + temporary table, we add the table name to the drop statement + being built. The string always end in a comma and the comma + will be chopped off before being written to the binary log. + */ + if (thd->current_stmt_binlog_row_based && !dont_log_query) + { + ++non_temp_tables_count; + /* + Don't write the database name if it is the current one (or if + thd->db is NULL). + */ + built_query.append("`"); + if (thd->db == NULL || strcmp(db,thd->db) != 0) + { + built_query.append(db); + built_query.append("`.`"); + } + + built_query.append(table->table_name); + built_query.append("`,"); + } + error=0; + table_type= table->db_type; if (!drop_temporary) { abort_locked_tables(thd, db, table->table_name); @@ -258,14 +645,16 @@ int mysql_rm_table_part2(THD *thd, TABLE_LIST *tables, bool if_exists, DBUG_RETURN(-1); } alias= (lower_case_table_names == 2) ? table->alias : table->table_name; - /* remove form file and isam files */ - build_table_path(path, sizeof(path), db, alias, reg_ext); - } - if (drop_temporary || - (access(path,F_OK) && - ha_create_table_from_engine(thd,db,alias)) || - (!drop_view && - mysql_frm_type(thd, path, &table_type) != FRMTYPE_TABLE)) + /* remove .frm file and engine files */ + path_length= build_table_filename(path, sizeof(path), + db, alias, reg_ext); + } + if (table_type == NULL && + (drop_temporary || + (access(path, F_OK) && + ha_create_table_from_engine(thd, db, alias)) || + (!drop_view && + mysql_frm_type(thd, path, &frm_db_type) != FRMTYPE_TABLE))) { // Table was not found on disk and table can't be created from engine if (if_exists) @@ -278,13 +667,17 @@ int mysql_rm_table_part2(THD *thd, TABLE_LIST *tables, bool if_exists, else { char *end; - if (table_type == DB_TYPE_UNKNOWN) - mysql_frm_type(thd, path, &table_type); - *(end=fn_ext(path))=0; // Remove extension for delete - error= ha_delete_table(thd, table_type, path, table->table_name, + if (table_type == NULL) + { + mysql_frm_type(thd, path, &frm_db_type); + table_type= ha_resolve_by_legacy_type(thd, frm_db_type); + } + // Remove extension for delete + *(end= path + path_length - reg_ext_length)= '\0'; + error= ha_delete_table(thd, table_type, path, db, table->table_name, !dont_log_query); if ((error == ENOENT || error == HA_ERR_NO_SUCH_TABLE) && - (if_exists || table_type == DB_TYPE_UNKNOWN)) + (if_exists || table_type == NULL)) error= 0; if (error == HA_ERR_ROW_IS_REFERENCED) { @@ -327,12 +720,48 @@ int mysql_rm_table_part2(THD *thd, TABLE_LIST *tables, bool if_exists, if (some_tables_deleted || tmp_table_deleted || !error) { query_cache_invalidate3(thd, tables, 0); - if (!dont_log_query && mysql_bin_log.is_open()) + if (!dont_log_query) { - if (!error) - thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, FALSE, FALSE); - mysql_bin_log.write(&qinfo); + if (!thd->current_stmt_binlog_row_based || + non_temp_tables_count > 0 && !tmp_table_deleted) + { + /* + In this case, we are either using statement-based + replication or using row-based replication but have only + deleted one or more non-temporary tables (and no temporary + tables). In this case, we can write the original query into + the binary log. + */ + write_bin_log(thd, !error, thd->query, thd->query_length); + } + else if (thd->current_stmt_binlog_row_based && + non_temp_tables_count > 0 && + tmp_table_deleted) + { + /* + In this case we have deleted both temporary and + non-temporary tables, so: + - since we have deleted a non-temporary table we have to + binlog the statement, but + - since we have deleted a temporary table we cannot binlog + the statement (since the table has not been created on the + slave, this might cause the slave to stop). + + Instead, we write a built statement, only containing the + non-temporary tables, to the binary log + */ + built_query.chop(); // Chop of the last comma + built_query.append(" /* generated by server */"); + write_bin_log(thd, !error, built_query.ptr(), built_query.length()); + } + /* + The remaining cases are: + - no tables where deleted and + - only temporary tables where deleted and row-based + replication is used. + In both these cases, nothing should be written to the binary + log. + */ } } @@ -342,16 +771,20 @@ int mysql_rm_table_part2(THD *thd, TABLE_LIST *tables, bool if_exists, } -int quick_rm_table(enum db_type base,const char *db, +bool quick_rm_table(handlerton *base,const char *db, const char *table_name) { char path[FN_REFLEN]; - int error=0; - build_table_path(path, sizeof(path), db, table_name, reg_ext); + bool error= 0; + DBUG_ENTER("quick_rm_table"); + + uint path_length= build_table_filename(path, sizeof(path), + db, table_name, reg_ext); if (my_delete(path,MYF(0))) - error=1; /* purecov: inspected */ - *fn_ext(path)= 0; // Remove reg_ext - return ha_delete_table(current_thd, base, path, table_name, 0) || error; + error= 1; /* purecov: inspected */ + path[path_length - reg_ext_length]= '\0'; // Remove reg_ext + DBUG_RETURN(ha_delete_table(current_thd, base, path, db, table_name, 0) || + error); } /* @@ -643,10 +1076,16 @@ int prepare_create_field(create_field *sql_field, SYNOPSIS mysql_prepare_table() - thd Thread object - create_info Create information (like MAX_ROWS) - fields List of fields to create - keys List of keys to create + thd Thread object. + create_info Create information (like MAX_ROWS). + fields List of fields to create. + keys List of keys to create. + tmp_table If a temporary table is to be created. + db_options INOUT Table options (like HA_OPTION_PACK_RECORD). + file The handler for the new table. + key_info_buffer OUT An array of KEY structs for the indexes. + key_count OUT The number of elements in the array. + select_field_count The number of fields coming from a select table. DESCRIPTION Prepares the table and key structures for table creation. @@ -968,6 +1407,8 @@ static int mysql_prepare_table(THD *thd, HA_CREATE_INFO *create_info, while ((key=key_iterator++)) { + DBUG_PRINT("info", ("key name: '%s' type: %d", key->name ? key->name : + "(none)" , key->type)); if (key->type == Key::FOREIGN_KEY) { fk_key_count++; @@ -1028,7 +1469,7 @@ static int mysql_prepare_table(THD *thd, HA_CREATE_INFO *create_info, key_parts+=key->columns.elements; else (*key_count)--; - if (key->name && !tmp_table && + if (key->name && !tmp_table && (key->type != Key::PRIMARY) && !my_strcasecmp(system_charset_info,key->name,primary_key_name)) { my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0), key->name); @@ -1042,7 +1483,7 @@ static int mysql_prepare_table(THD *thd, HA_CREATE_INFO *create_info, DBUG_RETURN(-1); } - (*key_info_buffer) = key_info= (KEY*) sql_calloc(sizeof(KEY)* *key_count); + (*key_info_buffer)= key_info= (KEY*) sql_calloc(sizeof(KEY) * (*key_count)); key_part_info=(KEY_PART_INFO*) sql_calloc(sizeof(KEY_PART_INFO)*key_parts); if (!*key_info_buffer || ! key_part_info) DBUG_RETURN(-1); // Out of memory @@ -1070,6 +1511,8 @@ static int mysql_prepare_table(THD *thd, HA_CREATE_INFO *create_info, break; case Key::FULLTEXT: key_info->flags= HA_FULLTEXT; + if ((key_info->parser_name= key->parser_name)) + key_info->flags|= HA_USES_PARSER; break; case Key::SPATIAL: #ifdef HAVE_SPATIAL @@ -1401,6 +1844,34 @@ static int mysql_prepare_table(THD *thd, HA_CREATE_INFO *create_info, /* + Set table default charset, if not set + + SYNOPSIS + set_table_default_charset() + create_info Table create information + + DESCRIPTION + If the table character set was not given explicitely, + let's fetch the database default character set and + apply it to the table. +*/ + +static void set_table_default_charset(THD *thd, + HA_CREATE_INFO *create_info, char *db) +{ + if (!create_info->default_table_charset) + { + HA_CREATE_INFO db_info; + char path[FN_REFLEN]; + /* Abuse build_table_filename() to build the path to the db.opt file */ + build_table_filename(path, sizeof(path), db, "", MY_DB_OPT_FILE); + load_db_opt(thd, path, &db_info); + create_info->default_table_charset= db_info.default_table_charset; + } +} + + +/* Extend long VARCHAR fields to blob & prepare field if it's a blob SYNOPSIS @@ -1507,7 +1978,7 @@ void sp_prepare_create_field(THD *thd, create_field *sql_field) Create a table SYNOPSIS - mysql_create_table() + mysql_create_table_internal() thd Thread object db Database table_name Table name @@ -1530,19 +2001,21 @@ void sp_prepare_create_field(THD *thd, create_field *sql_field) TRUE error */ -bool mysql_create_table(THD *thd,const char *db, const char *table_name, - HA_CREATE_INFO *create_info, - List<create_field> &fields, - List<Key> &keys,bool internal_tmp_table, - uint select_field_count) +bool mysql_create_table_internal(THD *thd, + const char *db, const char *table_name, + HA_CREATE_INFO *create_info, + List<create_field> &fields, + List<Key> &keys,bool internal_tmp_table, + uint select_field_count) { char path[FN_REFLEN]; + uint path_length; const char *alias; uint db_options, key_count; KEY *key_info_buffer; handler *file; bool error= TRUE; - DBUG_ENTER("mysql_create_table"); + DBUG_ENTER("mysql_create_table_internal"); /* Check for duplicate fields and check type of table to create */ if (!fields.elements) @@ -1551,67 +2024,155 @@ bool mysql_create_table(THD *thd,const char *db, const char *table_name, MYF(0)); DBUG_RETURN(TRUE); } - if (check_engine(thd, table_name, &create_info->db_type)) + if (check_engine(thd, table_name, create_info)) DBUG_RETURN(TRUE); db_options= create_info->table_options; if (create_info->row_type == ROW_TYPE_DYNAMIC) db_options|=HA_OPTION_PACK_RECORD; alias= table_case_name(create_info, table_name); - file= get_new_handler((TABLE*) 0, thd->mem_root, create_info->db_type); - -#ifdef NOT_USED - /* - if there is a technical reason for a handler not to have support - for temp. tables this code can be re-enabled. - Otherwise, if a handler author has a wish to prohibit usage of - temporary tables for his handler he should implement a check in - ::create() method - */ - if ((create_info->options & HA_LEX_CREATE_TMP_TABLE) && - (file->table_flags() & HA_NO_TEMP_TABLES)) + if (!(file=get_new_handler((TABLE_SHARE*) 0, thd->mem_root, + create_info->db_type))) { - my_error(ER_ILLEGAL_HA, MYF(0), table_name); + mem_alloc_error(sizeof(handler)); DBUG_RETURN(TRUE); } -#endif - - /* - If the table character set was not given explicitely, - let's fetch the database default character set and - apply it to the table. - */ - if (!create_info->default_table_charset) +#ifdef WITH_PARTITION_STORAGE_ENGINE + partition_info *part_info= thd->lex->part_info; + if (!part_info && create_info->db_type->partition_flags && + (create_info->db_type->partition_flags() & HA_USE_AUTO_PARTITION)) { - HA_CREATE_INFO db_info; - char path[FN_REFLEN]; - /* Abuse build_table_path() to build the path to the db.opt file */ - build_table_path(path, sizeof(path), db, MY_DB_OPT_FILE, ""); - load_db_opt(thd, path, &db_info); - create_info->default_table_charset= db_info.default_table_charset; + /* + Table is not defined as a partitioned table but the engine handles + all tables as partitioned. The handler will set up the partition info + object with the default settings. + */ + thd->lex->part_info= part_info= new partition_info(); + if (!part_info) + { + mem_alloc_error(sizeof(partition_info)); + DBUG_RETURN(TRUE); + } + file->set_auto_partitions(part_info); } + if (part_info) + { + /* + The table has been specified as a partitioned table. + If this is part of an ALTER TABLE the handler will be the partition + handler but we need to specify the default handler to use for + partitions also in the call to check_partition_info. We transport + this information in the default_db_type variable, it is either + DB_TYPE_DEFAULT or the engine set in the ALTER TABLE command. + + Check that we don't use foreign keys in the table since it won't + work even with InnoDB beneath it. + */ + List_iterator<Key> key_iterator(keys); + Key *key; + handlerton *part_engine_type= create_info->db_type; + char *part_syntax_buf; + uint syntax_len; + handlerton *engine_type; + while ((key= key_iterator++)) + { + if (key->type == Key::FOREIGN_KEY) + { + my_error(ER_CANNOT_ADD_FOREIGN, MYF(0)); + goto err; + } + } + if (part_engine_type == &partition_hton) + { + /* + This only happens at ALTER TABLE. + default_engine_type was assigned from the engine set in the ALTER + TABLE command. + */ + ; + } + else + { + if (create_info->used_fields & HA_CREATE_USED_ENGINE) + { + part_info->default_engine_type= create_info->db_type; + } + else + { + if (part_info->default_engine_type == NULL) + { + part_info->default_engine_type= ha_checktype(thd, + DB_TYPE_DEFAULT, 0, 0); + } + } + } + DBUG_PRINT("info", ("db_type = %d", + ha_legacy_type(part_info->default_engine_type))); + if (check_partition_info(part_info, &engine_type, file, + create_info->max_rows)) + goto err; + part_info->default_engine_type= engine_type; + + /* + We reverse the partitioning parser and generate a standard format + for syntax stored in frm file. + */ + if (!(part_syntax_buf= generate_partition_syntax(part_info, + &syntax_len, + TRUE, FALSE))) + goto err; + part_info->part_info_string= part_syntax_buf; + part_info->part_info_len= syntax_len; + if (create_info->db_type != engine_type) + { + delete file; + if (!(file= get_new_handler((TABLE_SHARE*) 0, thd->mem_root, engine_type))) + { + mem_alloc_error(sizeof(handler)); + DBUG_RETURN(TRUE); + } + } + if ((!(engine_type->partition_flags && + engine_type->partition_flags() & HA_CAN_PARTITION)) || + create_info->db_type == &partition_hton) + { + /* + The handler assigned to the table cannot handle partitioning. + Assign the partition handler as the handler of the table. + */ + DBUG_PRINT("info", ("db_type: %d", + ha_legacy_type(create_info->db_type))); + delete file; + create_info->db_type= &partition_hton; + if (!(file= get_ha_partition(part_info))) + { + DBUG_RETURN(TRUE); + } + } + } +#endif + + set_table_default_charset(thd, create_info, (char*) db); if (mysql_prepare_table(thd, create_info, &fields, &keys, internal_tmp_table, &db_options, file, &key_info_buffer, &key_count, select_field_count)) - DBUG_RETURN(TRUE); + goto err; /* Check if table exists */ if (create_info->options & HA_LEX_CREATE_TMP_TABLE) { - my_snprintf(path, sizeof(path), "%s%s%lx_%lx_%x%s", - mysql_tmpdir, tmp_file_prefix, current_pid, thd->thread_id, - thd->tmp_table++, reg_ext); + path_length= build_tmptable_filename(thd, path, sizeof(path)); if (lower_case_table_names) my_casedn_str(files_charset_info, path); create_info->table_options|=HA_CREATE_DELAY_KEY_WRITE; } else - build_table_path(path, sizeof(path), db, alias, reg_ext); + path_length= build_table_filename(path, sizeof(path), db, alias, reg_ext); /* Check if table already exists */ - if ((create_info->options & HA_LEX_CREATE_TMP_TABLE) - && find_temporary_table(thd,db,table_name)) + if ((create_info->options & HA_LEX_CREATE_TMP_TABLE) && + find_temporary_table(thd, db, table_name)) { if (create_info->options & HA_LEX_CREATE_IF_NOT_EXISTS) { @@ -1619,13 +2180,14 @@ bool mysql_create_table(THD *thd,const char *db, const char *table_name, push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_NOTE, ER_TABLE_EXISTS_ERROR, ER(ER_TABLE_EXISTS_ERROR), alias); - DBUG_RETURN(FALSE); + error= 0; + goto err; } my_error(ER_TABLE_EXISTS_ERROR, MYF(0), alias); - DBUG_RETURN(TRUE); + goto err; } if (wait_if_global_read_lock(thd, 0, 1)) - DBUG_RETURN(TRUE); + goto err; VOID(pthread_mutex_lock(&LOCK_open)); if (!internal_tmp_table && !(create_info->options & HA_LEX_CREATE_TMP_TABLE)) { @@ -1634,8 +2196,9 @@ bool mysql_create_table(THD *thd,const char *db, const char *table_name, if (create_info->options & HA_LEX_CREATE_IF_NOT_EXISTS) goto warn; my_error(ER_TABLE_EXISTS_ERROR,MYF(0),table_name); - goto end; + goto unlock_and_end; } + DBUG_ASSERT(get_cached_table_share(db, alias) == 0); } /* @@ -1658,7 +2221,7 @@ bool mysql_create_table(THD *thd,const char *db, const char *table_name, if (create_if_not_exists) goto warn; my_error(ER_TABLE_EXISTS_ERROR,MYF(0),table_name); - goto end; + goto unlock_and_end; } } @@ -1669,32 +2232,42 @@ bool mysql_create_table(THD *thd,const char *db, const char *table_name, create_info->data_file_name= create_info->index_file_name= 0; create_info->table_options=db_options; - if (rea_create_table(thd, path, db, table_name, - create_info, fields, key_count, - key_info_buffer)) - goto end; + path[path_length - reg_ext_length]= '\0'; // Remove .frm extension + if (rea_create_table(thd, path, db, table_name, create_info, fields, + key_count, key_info_buffer, file)) + goto unlock_and_end; + if (create_info->options & HA_LEX_CREATE_TMP_TABLE) { /* Open table and put in temporary table list */ if (!(open_temporary_table(thd, path, db, table_name, 1))) { (void) rm_temporary_table(create_info->db_type, path); - goto end; + goto unlock_and_end; } thd->tmp_table_used= 1; } - if (!internal_tmp_table && mysql_bin_log.is_open()) - { - thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, FALSE, FALSE); - mysql_bin_log.write(&qinfo); - } - error= FALSE; -end: + /* + Don't write statement if: + - It is an internal temporary table, + - Row-based logging is used and it we are creating a temporary table, or + - The binary log is not open. + Otherwise, the statement shall be binlogged. + */ + if (!internal_tmp_table && + (!thd->current_stmt_binlog_row_based || + (thd->current_stmt_binlog_row_based && + !(create_info->options & HA_LEX_CREATE_TMP_TABLE)))) + write_bin_log(thd, TRUE, thd->query, thd->query_length); + error= FALSE; +unlock_and_end: VOID(pthread_mutex_unlock(&LOCK_open)); start_waiting_global_read_lock(thd); + +err: thd->proc_info="After create"; + delete file; DBUG_RETURN(error); warn: @@ -1703,9 +2276,52 @@ warn: ER_TABLE_EXISTS_ERROR, ER(ER_TABLE_EXISTS_ERROR), alias); create_info->table_existed= 1; // Mark that table existed - goto end; + goto unlock_and_end; } + +/* + Database locking aware wrapper for mysql_create_table_internal(), +*/ + +bool mysql_create_table(THD *thd, const char *db, const char *table_name, + HA_CREATE_INFO *create_info, + List<create_field> &fields, + List<Key> &keys,bool internal_tmp_table, + uint select_field_count) +{ + bool result; + DBUG_ENTER("mysql_create_table"); + + /* Wait for any database locks */ + pthread_mutex_lock(&LOCK_lock_db); + while (!thd->killed && + hash_search(&lock_db_cache,(byte*) db, strlen(db))) + { + wait_for_condition(thd, &LOCK_lock_db, &COND_refresh); + pthread_mutex_lock(&LOCK_lock_db); + } + + if (thd->killed) + { + pthread_mutex_unlock(&LOCK_lock_db); + DBUG_RETURN(TRUE); + } + creating_table++; + pthread_mutex_unlock(&LOCK_lock_db); + + result= mysql_create_table_internal(thd, db, table_name, create_info, + fields, keys, internal_tmp_table, + select_field_count); + + pthread_mutex_lock(&LOCK_lock_db); + if (!--creating_table && creating_database) + pthread_cond_signal(&COND_refresh); + pthread_mutex_unlock(&LOCK_lock_db); + DBUG_RETURN(result); +} + + /* ** Give the key name after the first field with an optional '_#' after **/ @@ -1757,6 +2373,7 @@ TABLE *create_table_from_items(THD *thd, HA_CREATE_INFO *create_info, MYSQL_LOCK **lock) { TABLE tmp_table; // Used during 'create_field()' + TABLE_SHARE share; TABLE *table= 0; uint select_field_count= items->elements; /* Add selected items to field list */ @@ -1768,11 +2385,14 @@ TABLE *create_table_from_items(THD *thd, HA_CREATE_INFO *create_info, tmp_table.alias= 0; tmp_table.timestamp_field= 0; - tmp_table.s= &tmp_table.share_not_to_be_used; + tmp_table.s= &share; + init_tmp_table_share(&share, "", 0, "", ""); + tmp_table.s->db_create_options=0; tmp_table.s->blob_ptr_size= portable_sizeof_char_ptr; - tmp_table.s->db_low_byte_first= test(create_info->db_type == DB_TYPE_MYISAM || - create_info->db_type == DB_TYPE_HEAP); + tmp_table.s->db_low_byte_first= + test(create_info->db_type == &myisam_hton || + create_info->db_type == &heap_hton); tmp_table.null_row=tmp_table.maybe_null=0; while ((item=it++)) @@ -1849,7 +2469,7 @@ TABLE *create_table_from_items(THD *thd, HA_CREATE_INFO *create_info, ****************************************************************************/ bool -mysql_rename_table(enum db_type base, +mysql_rename_table(handlerton *base, const char *old_db, const char *old_name, const char *new_db, @@ -1859,13 +2479,15 @@ mysql_rename_table(enum db_type base, char from[FN_REFLEN], to[FN_REFLEN], lc_from[FN_REFLEN], lc_to[FN_REFLEN]; char *from_base= from, *to_base= to; char tmp_name[NAME_LEN+1]; - handler *file= (base == DB_TYPE_UNKNOWN ? 0 : - get_new_handler((TABLE*) 0, thd->mem_root, base)); + handler *file; int error=0; DBUG_ENTER("mysql_rename_table"); - build_table_path(from, sizeof(from), old_db, old_name, ""); - build_table_path(to, sizeof(to), new_db, new_name, ""); + file= (base == NULL ? 0 : + get_new_handler((TABLE_SHARE*) 0, thd->mem_root, base)); + + build_table_filename(from, sizeof(from), old_db, old_name, ""); + build_table_filename(to, sizeof(to), new_db, new_name, ""); /* If lower_case_table_names == 2 (case-preserving but case-insensitive @@ -1877,12 +2499,12 @@ mysql_rename_table(enum db_type base, { strmov(tmp_name, old_name); my_casedn_str(files_charset_info, tmp_name); - build_table_path(lc_from, sizeof(lc_from), old_db, tmp_name, ""); + build_table_filename(lc_from, sizeof(lc_from), old_db, tmp_name, ""); from_base= lc_from; strmov(tmp_name, new_name); my_casedn_str(files_charset_info, tmp_name); - build_table_path(lc_to, sizeof(lc_to), new_db, tmp_name, ""); + build_table_filename(lc_to, sizeof(lc_to), new_db, tmp_name, ""); to_base= lc_to; } @@ -1924,17 +2546,19 @@ mysql_rename_table(enum db_type base, static void wait_while_table_is_used(THD *thd,TABLE *table, enum ha_extra_function function) { - DBUG_PRINT("enter",("table: %s", table->s->table_name)); DBUG_ENTER("wait_while_table_is_used"); - safe_mutex_assert_owner(&LOCK_open); + DBUG_PRINT("enter", ("table: '%s' share: 0x%lx db_stat: %u version: %u", + table->s->table_name.str, (ulong) table->s, + table->db_stat, table->s->version)); VOID(table->file->extra(function)); /* Mark all tables that are in use as 'old' */ - mysql_lock_abort(thd, table); // end threads waiting on lock + mysql_lock_abort(thd, table, TRUE); /* end threads waiting on lock */ /* Wait until all there are no other threads that has this table open */ - remove_table_from_cache(thd, table->s->db, - table->s->table_name, RTFC_WAIT_OTHER_THREAD_FLAG); + remove_table_from_cache(thd, table->s->db.str, + table->s->table_name.str, + RTFC_WAIT_OTHER_THREAD_FLAG); DBUG_VOID_RETURN; } @@ -2005,23 +2629,21 @@ static int prepare_for_restore(THD* thd, TABLE_LIST* table, else { char* backup_dir= thd->lex->backup_dir; - char src_path[FN_REFLEN], dst_path[FN_REFLEN]; + char src_path[FN_REFLEN], dst_path[FN_REFLEN], uname[FN_REFLEN]; char* table_name= table->table_name; char* db= table->db; - if (fn_format_relative_to_data_home(src_path, table_name, backup_dir, - reg_ext)) + VOID(tablename_to_filename(table->table_name, uname, sizeof(uname))); + + if (fn_format_relative_to_data_home(src_path, uname, backup_dir, reg_ext)) DBUG_RETURN(-1); // protect buffer overflow - my_snprintf(dst_path, sizeof(dst_path), "%s%s/%s", - mysql_real_data_home, db, table_name); + build_table_filename(dst_path, sizeof(dst_path), db, table_name, reg_ext); if (lock_and_wait_for_table_name(thd,table)) DBUG_RETURN(-1); - if (my_copy(src_path, - fn_format(dst_path, dst_path,"", reg_ext, 4), - MYF(MY_WME))) + if (my_copy(src_path, dst_path, MYF(MY_WME))) { pthread_mutex_lock(&LOCK_open); unlock_table_name(thd, table); @@ -2056,11 +2678,15 @@ static int prepare_for_restore(THD* thd, TABLE_LIST* table, } -static int prepare_for_repair(THD* thd, TABLE_LIST *table_list, +static int prepare_for_repair(THD *thd, TABLE_LIST *table_list, HA_CHECK_OPT *check_opt) { int error= 0; TABLE tmp_table, *table; + TABLE_SHARE *share; + char from[FN_REFLEN],tmp[FN_REFLEN+32]; + const char **ext; + MY_STAT stat_info; DBUG_ENTER("prepare_for_repair"); if (!(check_opt->sql_flags & TT_USEFRM)) @@ -2068,12 +2694,26 @@ static int prepare_for_repair(THD* thd, TABLE_LIST *table_list, if (!(table= table_list->table)) /* if open_ltable failed */ { - char name[FN_REFLEN]; - build_table_path(name, sizeof(name), table_list->db, - table_list->table_name, ""); - if (openfrm(thd, name, "", 0, 0, 0, &tmp_table)) + char key[MAX_DBKEY_LENGTH]; + uint key_length; + + key_length= create_table_def_key(thd, key, table_list, 0); + pthread_mutex_lock(&LOCK_open); + if (!(share= (get_table_share(thd, table_list, key, key_length, 0, + &error)))) + { + pthread_mutex_unlock(&LOCK_open); DBUG_RETURN(0); // Can't open frm file + } + + if (open_table_from_share(thd, share, "", 0, 0, 0, &tmp_table, FALSE)) + { + release_table_share(share, RELEASE_NORMAL); + pthread_mutex_unlock(&LOCK_open); + DBUG_RETURN(0); // Out of memory + } table= &tmp_table; + pthread_mutex_unlock(&LOCK_open); } /* @@ -2086,18 +2726,16 @@ static int prepare_for_repair(THD* thd, TABLE_LIST *table_list, - Run a normal repair using the new index file and the old data file */ - char from[FN_REFLEN],tmp[FN_REFLEN+32]; - const char **ext= table->file->bas_ext(); - MY_STAT stat_info; - /* Check if this is a table type that stores index and data separately, like ISAM or MyISAM */ + ext= table->file->bas_ext(); if (!ext[0] || !ext[1]) goto end; // No data file - strxmov(from, table->s->path, ext[1], NullS); // Name of data file + // Name of data file + strxmov(from, table->s->normalized_path.str, ext[1], NullS); if (!my_stat(from, &stat_info, MYF(0))) goto end; // Can't use USE_FRM flag @@ -2161,7 +2799,11 @@ static int prepare_for_repair(THD* thd, TABLE_LIST *table_list, end: if (table == &tmp_table) - closefrm(table); // Free allocated memory + { + pthread_mutex_lock(&LOCK_open); + closefrm(table, 1); // Free allocated memory + pthread_mutex_unlock(&LOCK_open); + } DBUG_RETURN(error); } @@ -2318,14 +2960,15 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables, } /* Close all instances of the table to allow repair to rename files */ - if (lock_type == TL_WRITE && table->table->s->version) + if (lock_type == TL_WRITE && table->table->s->version && + !table->table->s->log_table) { pthread_mutex_lock(&LOCK_open); const char *old_message=thd->enter_cond(&COND_refresh, &LOCK_open, "Waiting to get writelock"); - mysql_lock_abort(thd,table->table); - remove_table_from_cache(thd, table->table->s->db, - table->table->s->table_name, + mysql_lock_abort(thd,table->table, TRUE); + remove_table_from_cache(thd, table->table->s->db.str, + table->table->s->table_name.str, RTFC_WAIT_OTHER_THREAD_FLAG | RTFC_CHECK_KILLED_FLAG); thd->exit_cond(old_message); @@ -2516,13 +3159,14 @@ send_result_message: } if (table->table) { + /* in the below check we do not refresh the log tables */ if (fatal_error) table->table->s->version=0; // Force close of table - else if (open_for_modify) + else if (open_for_modify && !table->table->s->log_table) { pthread_mutex_lock(&LOCK_open); - remove_table_from_cache(thd, table->table->s->db, - table->table->s->table_name, RTFC_NO_FLAG); + remove_table_from_cache(thd, table->table->s->db.str, + table->table->s->table_name.str, RTFC_NO_FLAG); pthread_mutex_unlock(&LOCK_open); /* Something may be modified, that's why we have to invalidate cache */ query_cache_invalidate3(thd, table->table, 0); @@ -2700,15 +3344,16 @@ bool mysql_create_like_table(THD* thd, TABLE_LIST* table, HA_CREATE_INFO *create_info, Table_ident *table_ident) { - TABLE **tmp_table; - char src_path[FN_REFLEN], dst_path[FN_REFLEN]; + TABLE *tmp_table; + char src_path[FN_REFLEN], dst_path[FN_REFLEN], tmp_path[FN_REFLEN]; + uint dst_path_length; char *db= table->db; char *table_name= table->table_name; char *src_db; char *src_table= table_ident->table.str; int err; bool res= TRUE; - db_type not_used; + enum legacy_db_type not_used; TABLE_LIST src_tables_list; DBUG_ENTER("mysql_create_like_table"); @@ -2738,13 +3383,13 @@ bool mysql_create_like_table(THD* thd, TABLE_LIST* table, goto err; if ((tmp_table= find_temporary_table(thd, src_db, src_table))) - strxmov(src_path, (*tmp_table)->s->path, reg_ext, NullS); + strxmov(src_path, tmp_table->s->path.str, reg_ext, NullS); else { - strxmov(src_path, mysql_data_home, "/", src_db, "/", src_table, - reg_ext, NullS); + build_table_filename(src_path, sizeof(src_path), + src_db, src_table, reg_ext); /* Resolve symlinks (for windows) */ - fn_format(src_path, src_path, "", "", MYF(MY_UNPACK_FILENAME)); + unpack_filename(src_path, src_path); if (lower_case_table_names) my_casedn_str(files_charset_info, src_path); if (access(src_path, F_OK)) @@ -2773,18 +3418,15 @@ bool mysql_create_like_table(THD* thd, TABLE_LIST* table, { if (find_temporary_table(thd, db, table_name)) goto table_exists; - my_snprintf(dst_path, sizeof(dst_path), "%s%s%lx_%lx_%x%s", - mysql_tmpdir, tmp_file_prefix, current_pid, - thd->thread_id, thd->tmp_table++, reg_ext); + dst_path_length= build_tmptable_filename(thd, dst_path, sizeof(dst_path)); if (lower_case_table_names) my_casedn_str(files_charset_info, dst_path); create_info->table_options|= HA_CREATE_DELAY_KEY_WRITE; } else { - strxmov(dst_path, mysql_data_home, "/", db, "/", table_name, - reg_ext, NullS); - fn_format(dst_path, dst_path, "", "", MYF(MY_UNPACK_FILENAME)); + dst_path_length= build_table_filename(dst_path, sizeof(dst_path), + db, table_name, reg_ext); if (!access(dst_path, F_OK)) goto table_exists; } @@ -2806,8 +3448,21 @@ bool mysql_create_like_table(THD* thd, TABLE_LIST* table, creation, instead create the table directly (for both normal and temporary tables). */ - *fn_ext(dst_path)= 0; - err= ha_create_table(dst_path, create_info, 1); +#ifdef WITH_PARTITION_STORAGE_ENGINE + /* + For partitioned tables we need to copy the .par file as well since + it is used in open_table_def to even be able to create a new handler. + There is no way to find out here if the original table is a + partitioned table so we copy the file and ignore any errors. + */ + fn_format(tmp_path, dst_path, reg_ext, ".par", MYF(MY_REPLACE_EXT)); + strmov(dst_path, tmp_path); + fn_format(tmp_path, src_path, reg_ext, ".par", MYF(MY_REPLACE_EXT)); + strmov(src_path, tmp_path); + my_copy(src_path, dst_path, MYF(MY_DONT_OVERWRITE_FILE)); +#endif + dst_path[dst_path_length - reg_ext_length]= '\0'; // Remove .frm + err= ha_create_table(thd, dst_path, db, table_name, create_info, 1); if (create_info->options & HA_LEX_CREATE_TMP_TABLE) { @@ -2825,13 +3480,63 @@ bool mysql_create_like_table(THD* thd, TABLE_LIST* table, goto err; /* purecov: inspected */ } - // Must be written before unlock - if (mysql_bin_log.is_open()) + /* + We have to write the query before we unlock the tables. + */ + if (thd->current_stmt_binlog_row_based) { - thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, FALSE, FALSE); - mysql_bin_log.write(&qinfo); + /* + Since temporary tables are not replicated under row-based + replication, CREATE TABLE ... LIKE ... needs special + treatement. We have four cases to consider, according to the + following decision table: + + ==== ========= ========= ============================== + Case Target Source Write to binary log + ==== ========= ========= ============================== + 1 normal normal Original statement + 2 normal temporary Generated statement + 3 temporary normal Nothing + 4 temporary temporary Nothing + ==== ========= ========= ============================== + + The variable 'tmp_table' below is used to see if the source + table is a temporary table: if it is set, then the source table + was a temporary table and we can take apropriate actions. + */ + if (!(create_info->options & HA_LEX_CREATE_TMP_TABLE)) + { + if (tmp_table) // Case 2 + { + char buf[2048]; + String query(buf, sizeof(buf), system_charset_info); + query.length(0); // Have to zero it since constructor doesn't + TABLE *table_ptr; + int error; + + /* + Let's open and lock the table: it will be closed (and + unlocked) by close_thread_tables() at the end of the + statement anyway. + */ + if (!(table_ptr= open_ltable(thd, table, TL_READ_NO_INSERT))) + goto err; + + int result= store_create_info(thd, table, &query, create_info); + + DBUG_ASSERT(result == 0); // store_create_info() always return 0 + write_bin_log(thd, TRUE, query.ptr(), query.length()); + } + else // Case 1 + write_bin_log(thd, TRUE, thd->query, thd->query_length); + } + /* + Case 3 and 4 does nothing under RBR + */ } + else + write_bin_log(thd, TRUE, thd->query, thd->query_length); + res= FALSE; goto err; @@ -2937,11 +3642,7 @@ mysql_discard_or_import_tablespace(THD *thd, error=1; if (error) goto err; - if (mysql_bin_log.is_open()) - { - Query_log_event qinfo(thd, thd->query, thd->query_length, FALSE, FALSE); - mysql_bin_log.write(&qinfo); - } + write_bin_log(thd, FALSE, thd->query, thd->query_length); err: close_thread_tables(thd); thd->tablespace_op=FALSE; @@ -2958,206 +3659,218 @@ err: } -#ifdef NOT_USED /* - CREATE INDEX and DROP INDEX are implemented by calling ALTER TABLE with - the proper arguments. This isn't very fast but it should work for most - cases. - One should normally create all indexes with CREATE TABLE or ALTER TABLE. + SYNOPSIS + compare_tables() + table The original table. + create_list The fields for the new table. + key_info_buffer An array of KEY structs for the new indexes. + key_count The number of elements in the array. + create_info Create options for the new table. + alter_info Alter options. + order_num Number of order list elements. + index_drop_buffer OUT An array of offsets into table->key_info. + index_drop_count OUT The number of elements in the array. + index_add_buffer OUT An array of offsets into key_info_buffer. + index_add_count OUT The number of elements in the array. + + DESCRIPTION + 'table' (first argument) contains information of the original + table, which includes all corresponding parts that the new + table has in arguments create_list, key_list and create_info. + + By comparing the changes between the original and new table + we can determine how much it has changed after ALTER TABLE + and whether we need to make a copy of the table, or just change + the .frm file. + + If there are no data changes, but index changes, 'index_drop_buffer' + and/or 'index_add_buffer' are populated with offsets into + table->key_info or key_info_buffer respectively for the indexes + that need to be dropped and/or (re-)created. + + RETURN VALUES + 0 No copy needed + ALTER_TABLE_DATA_CHANGED Data changes, copy needed + ALTER_TABLE_INDEX_CHANGED Index changes, copy might be needed */ -int mysql_create_indexes(THD *thd, TABLE_LIST *table_list, List<Key> &keys) +static uint compare_tables(TABLE *table, List<create_field> *create_list, + KEY *key_info_buffer, uint key_count, + HA_CREATE_INFO *create_info, + ALTER_INFO *alter_info, uint order_num, + uint *index_drop_buffer, uint *index_drop_count, + uint *index_add_buffer, uint *index_add_count) { - List<create_field> fields; - List<Alter_drop> drop; - List<Alter_column> alter; - HA_CREATE_INFO create_info; - int rc; - uint idx; - uint db_options; - uint key_count; - TABLE *table; - Field **f_ptr; - KEY *key_info_buffer; - char path[FN_REFLEN+1]; - DBUG_ENTER("mysql_create_index"); + Field **f_ptr, *field; + uint changes= 0, tmp; + List_iterator_fast<create_field> new_field_it(*create_list); + create_field *new_field; + DBUG_ENTER("compare_tables"); /* - Try to use online generation of index. - This requires that all indexes can be created online. - Otherwise, the old alter table procedure is executed. - - Open the table to have access to the correct table handler. + Some very basic checks. If number of fields changes, or the + handler, we need to run full ALTER TABLE. In the future + new fields can be added and old dropped without copy, but + not yet. + + Test also that engine was not given during ALTER TABLE, or + we are force to run regular alter table (copy). + E.g. ALTER TABLE tbl_name ENGINE=MyISAM. + + For the following ones we also want to run regular alter table: + ALTER TABLE tbl_name ORDER BY .. + ALTER TABLE tbl_name CONVERT TO CHARACTER SET .. + + At the moment we can't handle altering temporary tables without a copy. + We also test if OPTIMIZE TABLE was given and was mapped to alter table. + In that case we always do full copy. */ - if (!(table=open_ltable(thd,table_list,TL_WRITE_ALLOW_READ))) - DBUG_RETURN(-1); + if (table->s->fields != create_list->elements || + table->s->db_type != create_info->db_type || + table->s->tmp_table || + create_info->used_fields & HA_CREATE_USED_ENGINE || + create_info->used_fields & HA_CREATE_USED_CHARSET || + create_info->used_fields & HA_CREATE_USED_DEFAULT_CHARSET || + (alter_info->flags & ALTER_RECREATE) || + order_num) + DBUG_RETURN(ALTER_TABLE_DATA_CHANGED); /* - The add_index method takes an array of KEY structs for the new indexes. - Preparing a new table structure generates this array. - It needs a list with all fields of the table, which does not need to - be correct in every respect. The field names are important. + Go through fields and check if the original ones are compatible + with new table. */ - for (f_ptr= table->field; *f_ptr; f_ptr++) + for (f_ptr= table->field, new_field= new_field_it++; + (field= *f_ptr); f_ptr++, new_field= new_field_it++) { - create_field *c_fld= new create_field(*f_ptr, *f_ptr); - c_fld->unireg_check= Field::NONE; /*avoid multiple auto_increments*/ - fields.push_back(c_fld); + /* Make sure we have at least the default charset in use. */ + if (!new_field->charset) + new_field->charset= create_info->default_table_charset; + + /* Check that NULL behavior is same for old and new fields */ + if ((new_field->flags & NOT_NULL_FLAG) != + (uint) (field->flags & NOT_NULL_FLAG)) + DBUG_RETURN(ALTER_TABLE_DATA_CHANGED); + + /* Don't pack rows in old tables if the user has requested this. */ + if (create_info->row_type == ROW_TYPE_DYNAMIC || + (new_field->flags & BLOB_FLAG) || + new_field->sql_type == MYSQL_TYPE_VARCHAR && + create_info->row_type != ROW_TYPE_FIXED) + create_info->table_options|= HA_OPTION_PACK_RECORD; + + /* Evaluate changes bitmap and send to check_if_incompatible_data() */ + if (!(tmp= field->is_equal(new_field))) + DBUG_RETURN(ALTER_TABLE_DATA_CHANGED); + // Clear indexed marker + field->add_index= 0; + changes|= tmp; } - bzero((char*) &create_info,sizeof(create_info)); - create_info.db_type=DB_TYPE_DEFAULT; - create_info.default_table_charset= thd->variables.collation_database; - db_options= 0; - if (mysql_prepare_table(thd, &create_info, &fields, - &keys, /*tmp_table*/ 0, &db_options, table->file, - &key_info_buffer, key_count, - /*select_field_count*/ 0)) - DBUG_RETURN(-1); /* - Check if all keys can be generated with the add_index method. - If anyone cannot, then take the old way. + Go through keys and check if the original ones are compatible + with new table. */ - for (idx=0; idx< key_count; idx++) - { - DBUG_PRINT("info", ("creating index %s", key_info_buffer[idx].name)); - if (!(table->file->index_ddl_flags(key_info_buffer+idx)& - (HA_DDL_ONLINE| HA_DDL_WITH_LOCK))) - break ; - } - if ((idx < key_count)|| !key_count) - { - /* Re-initialize the create_info, which was changed by prepare table. */ - bzero((char*) &create_info,sizeof(create_info)); - create_info.db_type=DB_TYPE_DEFAULT; - create_info.default_table_charset= thd->variables.collation_database; - /* Cleanup the fields list. We do not want to create existing fields. */ - fields.delete_elements(); - if (real_alter_table(thd, table_list->db, table_list->table_name, - &create_info, table_list, table, - fields, keys, drop, alter, 0, (ORDER*)0, - ALTER_ADD_INDEX, DUP_ERROR)) - /* Don't need to free((gptr) key_info_buffer);*/ - DBUG_RETURN(-1); - } - else - { - if (table->file->add_index(table, key_info_buffer, key_count)|| - build_table_path(path, sizeof(path), table_list->db, - (lower_case_table_names == 2) ? - table_list->alias : table_list->table_name, - reg_ext) == 0 || - mysql_create_frm(thd, path, &create_info, - fields, key_count, key_info_buffer, table->file)) - /* don't need to free((gptr) key_info_buffer);*/ - DBUG_RETURN(-1); - } - /* don't need to free((gptr) key_info_buffer);*/ - DBUG_RETURN(0); -} - - -int mysql_drop_indexes(THD *thd, TABLE_LIST *table_list, - List<Alter_drop> &drop) -{ - List<create_field> fields; - List<Key> keys; - List<Alter_column> alter; - HA_CREATE_INFO create_info; - uint idx; - uint db_options; - uint key_count; - uint *key_numbers; - TABLE *table; - Field **f_ptr; - KEY *key_info; - KEY *key_info_buffer; - char path[FN_REFLEN]; - DBUG_ENTER("mysql_drop_index"); + KEY *table_key; + KEY *table_key_end= table->key_info + table->s->keys; + KEY *new_key; + KEY *new_key_end= key_info_buffer + key_count; + DBUG_PRINT("info", ("index count old: %d new: %d", + table->s->keys, key_count)); /* - Try to use online generation of index. - This requires that all indexes can be created online. - Otherwise, the old alter table procedure is executed. - - Open the table to have access to the correct table handler. + Step through all keys of the old table and search matching new keys. */ - if (!(table=open_ltable(thd,table_list,TL_WRITE_ALLOW_READ))) - DBUG_RETURN(-1); + *index_drop_count= 0; + *index_add_count= 0; + for (table_key= table->key_info; table_key < table_key_end; table_key++) + { + KEY_PART_INFO *table_part; + KEY_PART_INFO *table_part_end= table_key->key_part + table_key->key_parts; + KEY_PART_INFO *new_part; - /* - The drop_index method takes an array of key numbers. - It cannot get more entries than keys in the table. - */ - key_numbers= (uint*) thd->alloc(sizeof(uint*)*table->keys); - key_count= 0; + /* Search a new key with the same name. */ + for (new_key= key_info_buffer; new_key < new_key_end; new_key++) + { + if (! strcmp(table_key->name, new_key->name)) + break; + } + if (new_key >= new_key_end) + { + /* Key not found. Add the offset of the key to the drop buffer. */ + index_drop_buffer[(*index_drop_count)++]= table_key - table->key_info; + DBUG_PRINT("info", ("index dropped: '%s'", table_key->name)); + continue; + } + + /* Check that the key types are compatible between old and new tables. */ + if ((table_key->algorithm != new_key->algorithm) || + ((table_key->flags & HA_KEYFLAG_MASK) != + (new_key->flags & HA_KEYFLAG_MASK)) || + (table_key->key_parts != new_key->key_parts)) + goto index_changed; + + /* + Check that the key parts remain compatible between the old and + new tables. + */ + for (table_part= table_key->key_part, new_part= new_key->key_part; + table_part < table_part_end; + table_part++, new_part++) + { + /* + Key definition has changed if we are using a different field or + if the used key part length is different. We know that the fields + did not change. Comparing field numbers is sufficient. + */ + if ((table_part->length != new_part->length) || + (table_part->fieldnr - 1 != new_part->fieldnr)) + goto index_changed; + } + continue; + + index_changed: + /* Key modified. Add the offset of the key to both buffers. */ + index_drop_buffer[(*index_drop_count)++]= table_key - table->key_info; + index_add_buffer[(*index_add_count)++]= new_key - key_info_buffer; + field= table->field[new_key->key_part->fieldnr]; + // Mark field to be part of new key + field->add_index= 1; + DBUG_PRINT("info", ("index changed: '%s'", table_key->name)); + } + /*end of for (; table_key < table_key_end;) */ /* - Get the number of each key and check if it can be created online. + Step through all keys of the new table and find matching old keys. */ - List_iterator<Alter_drop> drop_it(drop); - Alter_drop *drop_key; - while ((drop_key= drop_it++)) + for (new_key= key_info_buffer; new_key < new_key_end; new_key++) { - /* Find the key in the table. */ - key_info=table->key_info; - for (idx=0; idx< table->keys; idx++, key_info++) + /* Search an old key with the same name. */ + for (table_key= table->key_info; table_key < table_key_end; table_key++) { - if (!my_strcasecmp(system_charset_info, key_info->name, drop_key->name)) - break; + if (! strcmp(table_key->name, new_key->name)) + break; } - if (idx>= table->keys) + if (table_key >= table_key_end) { - my_error(ER_CANT_DROP_FIELD_OR_KEY, MYF(0), drop_key->name); - /*don't need to free((gptr) key_numbers);*/ - DBUG_RETURN(-1); + /* Key not found. Add the offset of the key to the add buffer. */ + index_add_buffer[(*index_add_count)++]= new_key - key_info_buffer; + field= table->field[new_key->key_part->fieldnr]; + // Mark field to be part of new key + field->add_index= 1; + DBUG_PRINT("info", ("index added: '%s'", new_key->name)); } - /* - Check if the key can be generated with the add_index method. - If anyone cannot, then take the old way. - */ - DBUG_PRINT("info", ("dropping index %s", table->key_info[idx].name)); - if (!(table->file->index_ddl_flags(table->key_info+idx)& - (HA_DDL_ONLINE| HA_DDL_WITH_LOCK))) - break ; - key_numbers[key_count++]= idx; } - bzero((char*) &create_info,sizeof(create_info)); - create_info.db_type=DB_TYPE_DEFAULT; - create_info.default_table_charset= thd->variables.collation_database; + /* Check if changes are compatible with current handler without a copy */ + if (table->file->check_if_incompatible_data(create_info, changes)) + DBUG_RETURN(ALTER_TABLE_DATA_CHANGED); - if ((drop_key)|| (drop.elements<= 0)) - { - if (real_alter_table(thd, table_list->db, table_list->table_name, - &create_info, table_list, table, - fields, keys, drop, alter, 0, (ORDER*)0, - ALTER_DROP_INDEX, DUP_ERROR)) - /*don't need to free((gptr) key_numbers);*/ - DBUG_RETURN(-1); - } - else - { - db_options= 0; - if (table->file->drop_index(table, key_numbers, key_count)|| - mysql_prepare_table(thd, &create_info, &fields, - &keys, /*tmp_table*/ 0, &db_options, table->file, - &key_info_buffer, key_count, - /*select_field_count*/ 0)|| - build_table_path(path, sizeof(path), table_list->db, - (lower_case_table_names == 2) ? - table_list->alias : table_list->table_name, - reg_ext) == 0 || - mysql_create_frm(thd, path, &create_info, - fields, key_count, key_info_buffer, table->file)) - /*don't need to free((gptr) key_numbers);*/ - DBUG_RETURN(-1); - } + if (*index_drop_count || *index_add_count) + DBUG_RETURN(ALTER_TABLE_INDEX_CHANGED); - /*don't need to free((gptr) key_numbers);*/ - DBUG_RETURN(0); + DBUG_RETURN(0); // Tables are compatible } -#endif /* NOT_USED */ /* @@ -3177,22 +3890,39 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, char tmp_name[80],old_name[32],new_name_buff[FN_REFLEN]; char new_alias_buff[FN_REFLEN], *table_name, *db, *new_alias, *alias; char index_file[FN_REFLEN], data_file[FN_REFLEN]; + char reg_path[FN_REFLEN+1]; ha_rows copied,deleted; ulonglong next_insert_id; uint db_create_options, used_fields; - enum db_type old_db_type,new_db_type; - bool need_copy_table; + handlerton *old_db_type, *new_db_type; + uint need_copy_table= 0; +#ifdef WITH_PARTITION_STORAGE_ENGINE + uint fast_alter_partition= 0; + bool partition_changed= FALSE; +#endif + List<create_field> prepared_create_list; + List<Key> prepared_key_list; + bool need_lock_for_indexes= TRUE; + uint db_options= 0; + uint key_count; + KEY *key_info_buffer; + uint index_drop_count; + uint *index_drop_buffer; + uint index_add_count; + uint *index_add_buffer; + bool committed= 0; DBUG_ENTER("mysql_alter_table"); thd->proc_info="init"; table_name=table_list->table_name; alias= (lower_case_table_names == 2) ? table_list->alias : table_name; - db=table_list->db; if (!new_db || !my_strcasecmp(table_alias_charset, new_db, db)) new_db= db; + build_table_filename(reg_path, sizeof(reg_path), db, table_name, reg_ext); + used_fields=create_info->used_fields; - + mysql_ha_flush(thd, table_list, MYSQL_HA_CLOSE_FINAL, FALSE); /* DISCARD/IMPORT TABLESPACE is always alone in an ALTER TABLE */ @@ -3227,7 +3957,7 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, } else { - if (table->s->tmp_table) + if (table->s->tmp_table != NO_TMP_TABLE) { if (find_temporary_table(thd,new_db,new_name_buff)) { @@ -3238,7 +3968,8 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, else { char dir_buff[FN_REFLEN]; - strxnmov(dir_buff, FN_REFLEN, mysql_real_data_home, new_db, NullS); + strxnmov(dir_buff, sizeof(dir_buff)-1, + mysql_real_data_home, new_db, NullS); if (!access(fn_format(new_name_buff,new_name_buff,dir_buff,reg_ext,0), F_OK)) { @@ -3256,15 +3987,25 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, } old_db_type= table->s->db_type; - if (create_info->db_type == DB_TYPE_DEFAULT) + if (create_info->db_type == (handlerton*) &default_hton) create_info->db_type= old_db_type; - if (check_engine(thd, new_name, &create_info->db_type)) + +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (prep_alter_part_table(thd, table, alter_info, create_info, old_db_type, + &partition_changed, &fast_alter_partition)) + { + DBUG_RETURN(TRUE); + } +#endif + if (check_engine(thd, new_name, create_info)) DBUG_RETURN(TRUE); new_db_type= create_info->db_type; if (create_info->row_type == ROW_TYPE_NOT_USED) create_info->row_type= table->s->row_type; - DBUG_PRINT("info", ("old type: %d new type: %d", old_db_type, new_db_type)); + DBUG_PRINT("info", ("old type: %s new type: %s", + ha_resolve_storage_engine_name(old_db_type), + ha_resolve_storage_engine_name(new_db_type))); if (ha_check_storage_engine_flag(old_db_type, HTON_ALTER_NOT_SUPPORTED) || ha_check_storage_engine_flag(new_db_type, HTON_ALTER_NOT_SUPPORTED)) { @@ -3292,6 +4033,7 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, else { *fn_ext(new_name)=0; + table->s->version= 0; // Force removal of table def close_cached_table(thd, table); if (mysql_rename_table(old_db_type,db,table_name,new_db,new_alias)) error= -1; @@ -3337,12 +4079,7 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, } if (!error) { - if (mysql_bin_log.is_open()) - { - thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, FALSE, FALSE); - mysql_bin_log.write(&qinfo); - } + write_bin_log(thd, TRUE, thd->query, thd->query_length); if (do_send_ok) send_ok(thd); } @@ -3423,8 +4160,8 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, def_it.remove(); } } - else - { // Use old field value + else // This field was not dropped and not changed, add it to the list + { // for the new table. create_list.push_back(def=new create_field(field,field)); alter_it.rewind(); // Change default if ALTER Alter_column *alter; @@ -3579,7 +4316,9 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, key_name, key_info->algorithm, test(key_info->flags & HA_GENERATED_KEY), - key_parts)); + key_parts, + key_info->flags & HA_USES_PARSER ? + &key_info->parser->name : 0)); } { Key *key; @@ -3640,17 +4379,203 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, if (table->s->tmp_table) create_info->options|=HA_LEX_CREATE_TMP_TABLE; + set_table_default_charset(thd, create_info, db); + + { + /* + For some purposes we need prepared table structures and translated + key descriptions with proper default key name assignment. + + Unfortunately, mysql_prepare_table() modifies the field and key + lists. mysql_create_table() needs the unmodified lists. Hence, we + need to copy the lists and all their elements. The lists contain + pointers to the elements only. + + We cannot copy conditionally because the partition code always + needs prepared lists and compare_tables() needs them and is almost + always called. + */ + + /* Copy fields. */ + List_iterator<create_field> prep_field_it(create_list); + create_field *prep_field; + while ((prep_field= prep_field_it++)) + prepared_create_list.push_back(new create_field(*prep_field)); + + /* Copy keys and key parts. */ + List_iterator<Key> prep_key_it(key_list); + Key *prep_key; + while ((prep_key= prep_key_it++)) + { + List<key_part_spec> prep_columns; + List_iterator<key_part_spec> prep_col_it(prep_key->columns); + key_part_spec *prep_col; + + while ((prep_col= prep_col_it++)) + prep_columns.push_back(new key_part_spec(*prep_col)); + prepared_key_list.push_back(new Key(prep_key->type, prep_key->name, + prep_key->algorithm, + prep_key->generated, prep_columns, + prep_key->parser_name)); + } + + /* Create the prepared information. */ + if (mysql_prepare_table(thd, create_info, &prepared_create_list, + &prepared_key_list, + (table->s->tmp_table != NO_TMP_TABLE), &db_options, + table->file, &key_info_buffer, &key_count, 0)) + goto err; + } + + if (thd->variables.old_alter_table + || (table->s->db_type != create_info->db_type) +#ifdef WITH_PARTITION_STORAGE_ENGINE + || partition_changed +#endif + ) + need_copy_table= 1; + else + { + /* Try to optimize ALTER TABLE. Allocate result buffers. */ + if (! (index_drop_buffer= + (uint*) thd->alloc(sizeof(uint) * table->s->keys)) || + ! (index_add_buffer= + (uint*) thd->alloc(sizeof(uint) * prepared_key_list.elements))) + goto err; + /* Check how much the tables differ. */ + need_copy_table= compare_tables(table, &prepared_create_list, + key_info_buffer, key_count, + create_info, alter_info, order_num, + index_drop_buffer, &index_drop_count, + index_add_buffer, &index_add_count); + } + + /* + If there are index changes only, try to do them online. "Index + changes only" means also that the handler for the table does not + change. The table is open and locked. The handler can be accessed. + */ + if (need_copy_table == ALTER_TABLE_INDEX_CHANGED) + { + int pk_changed= 0; + ulong alter_flags= 0; + ulong needed_online_flags= 0; + ulong needed_fast_flags= 0; + KEY *key; + uint *idx_p; + uint *idx_end_p; + + if (table->s->db_type->alter_table_flags) + alter_flags= table->s->db_type->alter_table_flags(alter_info->flags); + DBUG_PRINT("info", ("alter_flags: %lu", alter_flags)); + /* Check dropped indexes. */ + for (idx_p= index_drop_buffer, idx_end_p= idx_p + index_drop_count; + idx_p < idx_end_p; + idx_p++) + { + key= table->key_info + *idx_p; + DBUG_PRINT("info", ("index dropped: '%s'", key->name)); + if (key->flags & HA_NOSAME) + { + /* Unique key. Check for "PRIMARY". */ + if (! my_strcasecmp(system_charset_info, + key->name, primary_key_name)) + { + /* Primary key. */ + needed_online_flags|= HA_ONLINE_DROP_PK_INDEX; + needed_fast_flags|= HA_ONLINE_DROP_PK_INDEX_NO_WRITES; + pk_changed++; + } + else + { + /* Non-primary unique key. */ + needed_online_flags|= HA_ONLINE_DROP_UNIQUE_INDEX; + needed_fast_flags|= HA_ONLINE_DROP_UNIQUE_INDEX_NO_WRITES; + } + } + else + { + /* Non-unique key. */ + needed_online_flags|= HA_ONLINE_DROP_INDEX; + needed_fast_flags|= HA_ONLINE_DROP_INDEX_NO_WRITES; + } + } + + /* Check added indexes. */ + for (idx_p= index_add_buffer, idx_end_p= idx_p + index_add_count; + idx_p < idx_end_p; + idx_p++) + { + key= key_info_buffer + *idx_p; + DBUG_PRINT("info", ("index added: '%s'", key->name)); + if (key->flags & HA_NOSAME) + { + /* Unique key. Check for "PRIMARY". */ + if (! my_strcasecmp(system_charset_info, + key->name, primary_key_name)) + { + /* Primary key. */ + needed_online_flags|= HA_ONLINE_ADD_PK_INDEX; + needed_fast_flags|= HA_ONLINE_ADD_PK_INDEX_NO_WRITES; + pk_changed++; + } + else + { + /* Non-primary unique key. */ + needed_online_flags|= HA_ONLINE_ADD_UNIQUE_INDEX; + needed_fast_flags|= HA_ONLINE_ADD_UNIQUE_INDEX_NO_WRITES; + } + } + else + { + /* Non-unique key. */ + needed_online_flags|= HA_ONLINE_ADD_INDEX; + needed_fast_flags|= HA_ONLINE_ADD_INDEX_NO_WRITES; + } + } + + /* + Online or fast add/drop index is possible only if + the primary key is not added and dropped in the same statement. + Otherwise we have to recreate the table. + need_copy_table is no-zero at this place. + */ + if ( pk_changed < 2 ) + { + if ((alter_flags & needed_online_flags) == needed_online_flags) + { + /* All required online flags are present. */ + need_copy_table= 0; + need_lock_for_indexes= FALSE; + } + else if ((alter_flags & needed_fast_flags) == needed_fast_flags) + { + /* All required fast flags are present. */ + need_copy_table= 0; + } + } + DBUG_PRINT("info", ("need_copy_table: %u need_lock: %d", + need_copy_table, need_lock_for_indexes)); + } + /* better have a negative test here, instead of positive, like alter_info->flags & ALTER_ADD_COLUMN|ALTER_ADD_INDEX|... so that ALTER TABLE won't break when somebody will add new flag */ - need_copy_table= (alter_info->flags & - ~(ALTER_CHANGE_COLUMN_DEFAULT|ALTER_OPTIONS) || - (create_info->used_fields & - ~(HA_CREATE_USED_COMMENT|HA_CREATE_USED_PASSWORD)) || - table->s->tmp_table); - create_info->frm_only= !need_copy_table; + if (!need_copy_table) + create_info->frm_only= 1; + +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (fast_alter_partition) + { + DBUG_RETURN(fast_alter_partition_table(thd, table, alter_info, + create_info, table_list, + &create_list, &key_list, + db, table_name, + fast_alter_partition)); + } +#endif /* Handling of symlinked tables: @@ -3676,7 +4601,6 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, Copy data. Remove old table and symlinks. */ - if (!strcmp(db, new_db)) // Ignore symlink if db changed { if (create_info->index_file_name) @@ -3699,15 +4623,19 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, else create_info->data_file_name=create_info->index_file_name=0; - /* We don't log the statement, it will be logged later. */ - { - tmp_disable_binlog(thd); - error= mysql_create_table(thd, new_db, tmp_name, - create_info,create_list,key_list,1,0); - reenable_binlog(thd); - if (error) - DBUG_RETURN(error); - } + /* + Create a table with a temporary name. + With create_info->frm_only == 1 this creates a .frm file only. + We don't log the statement, it will be logged later. + */ + tmp_disable_binlog(thd); + error= mysql_create_table(thd, new_db, tmp_name, + create_info,create_list,key_list,1,0); + reenable_binlog(thd); + if (error) + DBUG_RETURN(error); + + /* Open the table if we need to copy the data. */ if (need_copy_table) { if (table->s->tmp_table) @@ -3716,15 +4644,15 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, bzero((void*) &tbl, sizeof(tbl)); tbl.db= new_db; tbl.table_name= tbl.alias= tmp_name; + /* Table is in thd->temporary_tables */ new_table= open_table(thd, &tbl, thd->mem_root, (bool*) 0, MYSQL_LOCK_IGNORE_FLUSH); } else { char path[FN_REFLEN]; - my_snprintf(path, sizeof(path), "%s/%s/%s", mysql_data_home, - new_db, tmp_name); - fn_format(path,path,"","",4); + /* table is a normal table: Create temporary table in same directory */ + build_table_filename(path, sizeof(path), new_db, tmp_name, ""); new_table=open_temporary_table(thd, path, new_db, tmp_name,0); } if (!new_table) @@ -3734,14 +4662,15 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, } } - /* We don't want update TIMESTAMP fields during ALTER TABLE. */ + /* Copy the data if necessary. */ thd->count_cuted_fields= CHECK_FIELD_WARN; // calc cuted fields thd->cuted_fields=0L; thd->proc_info="copy to tmp table"; next_insert_id=thd->next_insert_id; // Remember for logging copied=deleted=0; - if (new_table && !new_table->s->is_view) + if (new_table && !(new_table->file->table_flags() & HA_NO_COPY_ON_ALTER)) { + /* We don't want update TIMESTAMP fields during ALTER TABLE. */ new_table->timestamp_field_type= TIMESTAMP_NO_AUTO_SET; new_table->next_number_field=new_table->found_next_number_field; error=copy_data_between_tables(table,new_table,create_list, @@ -3751,7 +4680,167 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, thd->last_insert_id=next_insert_id; // Needed for correct log thd->count_cuted_fields= CHECK_FIELD_IGNORE; - if (table->s->tmp_table) + /* If we did not need to copy, we might still need to add/drop indexes. */ + if (! new_table) + { + uint *key_numbers; + uint *keyno_p; + KEY *key_info; + KEY *key; + uint *idx_p; + uint *idx_end_p; + KEY_PART_INFO *key_part; + KEY_PART_INFO *part_end; + DBUG_PRINT("info", ("No new_table, checking add/drop index")); + + if (index_add_count) + { +#ifdef XXX_TO_BE_DONE_LATER_BY_WL3020_AND_WL1892 + if (! need_lock_for_indexes) + { + /* Downgrade the write lock. */ + mysql_lock_downgrade_write(thd, table, TL_WRITE_ALLOW_WRITE); + } + + /* Create a new .frm file for crash recovery. */ + /* TODO: Must set INDEX_TO_BE_ADDED flags in the frm file. */ + VOID(pthread_mutex_lock(&LOCK_open)); + error= (mysql_create_frm(thd, reg_path, db, table_name, + create_info, prepared_create_list, key_count, + key_info_buffer, table->file) || + table->file->create_handler_files(reg_path)); + VOID(pthread_mutex_unlock(&LOCK_open)); + if (error) + goto err; +#endif + + /* The add_index() method takes an array of KEY structs. */ + key_info= (KEY*) thd->alloc(sizeof(KEY) * index_add_count); + key= key_info; + for (idx_p= index_add_buffer, idx_end_p= idx_p + index_add_count; + idx_p < idx_end_p; + idx_p++, key++) + { + /* Copy the KEY struct. */ + *key= key_info_buffer[*idx_p]; + /* Fix the key parts. */ + part_end= key->key_part + key->key_parts; + for (key_part= key->key_part; key_part < part_end; key_part++) + key_part->field= table->field[key_part->fieldnr]; + } + /* Add the indexes. */ + if ((error= table->file->add_index(table, key_info, index_add_count))) + { + /* + Exchange the key_info for the error message. If we exchange + key number by key name in the message later, we need correct info. + */ + KEY *save_key_info= table->key_info; + table->key_info= key_info; + table->file->print_error(error, MYF(0)); + table->key_info= save_key_info; + goto err; + } + } + /*end of if (index_add_count)*/ + + if (index_drop_count) + { +#ifdef XXX_TO_BE_DONE_LATER_BY_WL3020_AND_WL1892 + /* Create a new .frm file for crash recovery. */ + /* TODO: Must set INDEX_IS_ADDED in the frm file. */ + /* TODO: Must set INDEX_TO_BE_DROPPED in the frm file. */ + VOID(pthread_mutex_lock(&LOCK_open)); + error= (mysql_create_frm(thd, reg_path, db, table_name, + create_info, prepared_create_list, key_count, + key_info_buffer, table->file) || + table->file->create_handler_files(reg_path)); + VOID(pthread_mutex_unlock(&LOCK_open)); + if (error) + goto err; + + if (! need_lock_for_indexes) + { + LOCK_PARAM_TYPE lpt; + + lpt.thd= thd; + lpt.table= table; + lpt.db= db; + lpt.table_name= table_name; + lpt.create_info= create_info; + lpt.create_list= &create_list; + lpt.key_count= key_count; + lpt.key_info_buffer= key_info_buffer; + abort_and_upgrade_lock(lpt); + } +#endif + + /* The prepare_drop_index() method takes an array of key numbers. */ + key_numbers= (uint*) thd->alloc(sizeof(uint) * index_drop_count); + keyno_p= key_numbers; + /* Get the number of each key. */ + for (idx_p= index_drop_buffer, idx_end_p= idx_p + index_drop_count; + idx_p < idx_end_p; + idx_p++, keyno_p++) + *keyno_p= *idx_p; + /* + Tell the handler to prepare for drop indexes. + This re-numbers the indexes to get rid of gaps. + */ + if ((error= table->file->prepare_drop_index(table, key_numbers, + index_drop_count))) + { + table->file->print_error(error, MYF(0)); + goto err; + } + +#ifdef XXX_TO_BE_DONE_LATER_BY_WL3020 + if (! need_lock_for_indexes) + { + /* Downgrade the lock again. */ + if (table->reginfo.lock_type == TL_WRITE_ALLOW_READ) + { + LOCK_PARAM_TYPE lpt; + + lpt.thd= thd; + lpt.table= table; + lpt.db= db; + lpt.table_name= table_name; + lpt.create_info= create_info; + lpt.create_list= &create_list; + lpt.key_count= key_count; + lpt.key_info_buffer= key_info_buffer; + close_open_tables_and_downgrade(lpt); + } + } +#endif + + /* Tell the handler to finally drop the indexes. */ + if ((error= table->file->final_drop_index(table))) + { + table->file->print_error(error, MYF(0)); + goto err; + } + } + /*end of if (index_drop_count)*/ + + if (index_add_count || index_drop_count) + { + /* + The final .frm file is already created as a temporary file + and will be renamed to the original table name later. + */ + + /* Need to commit before a table is unlocked (NDB requirement). */ + DBUG_PRINT("info", ("Committing after add/drop index")); + if (ha_commit_stmt(thd) || ha_commit(thd)) + goto err; + committed= 1; + } + } + /*end of if (! new_table) for add/drop index*/ + + if (table->s->tmp_table != NO_TMP_TABLE) { /* We changed a temporary table */ if (error) @@ -3760,7 +4849,10 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, The following function call will free the new_table pointer, in close_temporary_table(), so we can safely directly jump to err */ - close_temporary_table(thd,new_db,tmp_name); + if (new_table) + close_temporary_table(thd, new_table, 1, 1); + else + VOID(quick_rm_table(new_db_type,new_db,tmp_name)); goto err; } /* Close lock if this is a transactional table */ @@ -3770,26 +4862,29 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, thd->lock=0; } /* Remove link to old table and rename the new one */ - close_temporary_table(thd, table->s->db, table_name); + close_temporary_table(thd, table, 1, 1); /* Should pass the 'new_name' as we store table name in the cache */ if (rename_temporary_table(thd, new_table, new_db, new_name)) { // Fatal error - close_temporary_table(thd,new_db,tmp_name); - my_free((gptr) new_table,MYF(0)); + if (new_table) + { + close_temporary_table(thd, new_table, 1, 1); + my_free((gptr) new_table,MYF(0)); + } + else + VOID(quick_rm_table(new_db_type,new_db,tmp_name)); goto err; } - if (mysql_bin_log.is_open()) - { - thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, FALSE, FALSE); - mysql_bin_log.write(&qinfo); - } + /* We don't replicate alter table statement on temporary tables */ + if (!thd->current_stmt_binlog_row_based) + write_bin_log(thd, TRUE, thd->query, thd->query_length); goto end_temporary; } if (new_table) { - intern_close_table(new_table); /* close temporary table */ + /* close temporary table that will be the new table */ + intern_close_table(new_table); my_free((gptr) new_table,MYF(0)); } VOID(pthread_mutex_lock(&LOCK_open)); @@ -3832,6 +4927,7 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, close the original table at before doing the rename */ table_name=thd->strdup(table_name); // must be saved + table->s->version= 0; // Force removal of table def close_cached_table(thd, table); table=0; // Marker that table is closed } @@ -3843,7 +4939,7 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, error=0; if (!need_copy_table) - new_db_type=old_db_type=DB_TYPE_UNKNOWN; // this type cannot happen in regular ALTER + new_db_type=old_db_type= NULL; // this type cannot happen in regular ALTER if (mysql_rename_table(old_db_type,db,table_name,db,old_name)) { error=1; @@ -3868,18 +4964,40 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, closing the locked table. */ if (table) + { + table->s->version= 0; // Force removal of table def close_cached_table(thd,table); + } VOID(pthread_mutex_unlock(&LOCK_open)); goto err; } + if (! need_copy_table) + { + if (! table) + { + VOID(pthread_mutex_unlock(&LOCK_open)); + if (! (table= open_ltable(thd, table_list, TL_WRITE_ALLOW_READ))) + goto err; + VOID(pthread_mutex_lock(&LOCK_open)); + } + /* Tell the handler that a new frm file is in place. */ + if (table->file->create_handler_files(reg_path)) + { + VOID(pthread_mutex_unlock(&LOCK_open)); + goto err; + } + } if (thd->lock || new_name != table_name) // True if WIN32 { /* - Not table locking or alter table with rename - free locks and remove old table + Not table locking or alter table with rename. + Free locks and remove old table */ if (table) + { + table->s->version= 0; // Force removal of table def close_cached_table(thd,table); + } VOID(quick_rm_table(old_db_type,db,old_name)); } else @@ -3895,39 +5013,51 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, /* Mark in-use copies old */ remove_table_from_cache(thd,db,table_name,RTFC_NO_FLAG); /* end threads waiting on lock */ - mysql_lock_abort(thd,table); + mysql_lock_abort(thd,table, TRUE); } VOID(quick_rm_table(old_db_type,db,old_name)); if (close_data_tables(thd,db,table_name) || reopen_tables(thd,1,0)) { // This shouldn't happen if (table) + { + table->s->version= 0; // Force removal of table def close_cached_table(thd,table); // Remove lock for table + } VOID(pthread_mutex_unlock(&LOCK_open)); goto err; } } - /* The ALTER TABLE is always in its own transaction */ - error = ha_commit_stmt(thd); - if (ha_commit(thd)) - error=1; - if (error) + VOID(pthread_mutex_unlock(&LOCK_open)); + VOID(pthread_cond_broadcast(&COND_refresh)); + /* + The ALTER TABLE is always in its own transaction. + Commit must not be called while LOCK_open is locked. It could call + wait_if_global_read_lock(), which could create a deadlock if called + with LOCK_open. + */ + if (!committed) { - VOID(pthread_mutex_unlock(&LOCK_open)); - VOID(pthread_cond_broadcast(&COND_refresh)); - goto err; + error = ha_commit_stmt(thd); + if (ha_commit(thd)) + error=1; + if (error) + goto err; } thd->proc_info="end"; - if (mysql_bin_log.is_open()) - { - thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, FALSE, FALSE); - mysql_bin_log.write(&qinfo); - } - VOID(pthread_cond_broadcast(&COND_refresh)); - VOID(pthread_mutex_unlock(&LOCK_open)); -#ifdef HAVE_BERKELEY_DB - if (old_db_type == DB_TYPE_BERKELEY_DB) + + ha_binlog_log_query(thd, create_info->db_type, LOGCOM_ALTER_TABLE, + thd->query, thd->query_length, + db, table_name); + + DBUG_ASSERT(!(mysql_bin_log.is_open() && thd->current_stmt_binlog_row_based && + (create_info->options & HA_LEX_CREATE_TMP_TABLE))); + write_bin_log(thd, TRUE, thd->query, thd->query_length); + /* + TODO RONM: This problem needs to handled for Berkeley DB partitions + as well + */ + if (ha_check_storage_engine_flag(old_db_type,HTON_FLUSH_AFTER_RENAME)) { /* For the alter table to be properly flushed to the logs, we @@ -3935,7 +5065,7 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, shutdown. */ char path[FN_REFLEN]; - build_table_path(path, sizeof(path), new_db, table_name, ""); + build_table_filename(path, sizeof(path), new_db, table_name, ""); table=open_temporary_table(thd, path, new_db, tmp_name,0); if (table) { @@ -3943,11 +5073,10 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, my_free((char*) table, MYF(0)); } else - sql_print_warning("Could not open BDB table %s.%s after rename\n", + sql_print_warning("Could not open table %s.%s after rename\n", new_db,table_name); - (void) berkeley_flush_logs(); + ha_flush_logs(old_db_type); } -#endif table_list->table=0; // For query cache query_cache_invalidate3(thd, table_list, 0); @@ -3963,7 +5092,7 @@ end_temporary: err: DBUG_RETURN(TRUE); } - +/* mysql_alter_table */ static int copy_data_between_tables(TABLE *from,TABLE *to, @@ -4049,8 +5178,8 @@ copy_data_between_tables(TABLE *from,TABLE *to, MYF(MY_FAE | MY_ZEROFILL)); bzero((char*) &tables,sizeof(tables)); tables.table= from; - tables.alias= tables.table_name= (char*) from->s->table_name; - tables.db= (char*) from->s->db; + tables.alias= tables.table_name= from->s->table_name.str; + tables.db= from->s->db.str; error=1; if (thd->lex->select_lex.setup_ref_array(thd, order_num) || @@ -4069,7 +5198,8 @@ copy_data_between_tables(TABLE *from,TABLE *to, this function does not set field->query_id in the columns to the current query id */ - from->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS); + to->file->ha_set_all_bits_in_write_set(); + from->file->ha_retrieve_all_cols(); init_read_record(&info, thd, from, (SQL_SELECT *) 0, 1,1); if (ignore || handle_duplicates == DUP_REPLACE) @@ -4097,7 +5227,7 @@ copy_data_between_tables(TABLE *from,TABLE *to, { copy_ptr->do_copy(copy_ptr); } - if ((error=to->file->write_row((byte*) to->record[0]))) + if ((error=to->file->ha_write_row((byte*) to->record[0]))) { if ((!ignore && handle_duplicates != DUP_REPLACE) || @@ -4170,11 +5300,11 @@ bool mysql_recreate_table(THD *thd, TABLE_LIST *table_list, lex->col_list.empty(); lex->alter_info.reset(); bzero((char*) &create_info,sizeof(create_info)); - create_info.db_type=DB_TYPE_DEFAULT; + create_info.db_type= (handlerton*) &default_hton; create_info.row_type=ROW_TYPE_NOT_USED; create_info.default_table_charset=default_charset_info; /* Force alter table to recreate table */ - lex->alter_info.flags= ALTER_CHANGE_COLUMN; + lex->alter_info.flags= (ALTER_CHANGE_COLUMN | ALTER_RECREATE); DBUG_RETURN(mysql_alter_table(thd, NullS, NullS, &create_info, table_list, lex->create_list, lex->key_list, 0, (ORDER *) 0, @@ -4233,10 +5363,11 @@ bool mysql_checksum_table(THD *thd, TABLE_LIST *tables, HA_CHECK_OPT *check_opt) ha_checksum crc= 0; uchar null_mask=256 - (1 << t->s->last_null_bit_pos); - /* InnoDB must be told explicitly to retrieve all columns, because - this function does not set field->query_id in the columns to the - current query id */ - t->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS); + /* + Set all bits in read set and inform InnoDB that we are reading all + fields + */ + t->file->ha_retrieve_all_cols(); if (t->file->ha_rnd_init(1)) protocol->store_null(); @@ -4302,22 +5433,34 @@ bool mysql_checksum_table(THD *thd, TABLE_LIST *tables, HA_CHECK_OPT *check_opt) } static bool check_engine(THD *thd, const char *table_name, - enum db_type *new_engine) + HA_CREATE_INFO *create_info) { - enum db_type req_engine= *new_engine; + handlerton **new_engine= &create_info->db_type; + handlerton *req_engine= *new_engine; bool no_substitution= test(thd->variables.sql_mode & MODE_NO_ENGINE_SUBSTITUTION); - if ((*new_engine= - ha_checktype(thd, req_engine, no_substitution, 1)) == DB_TYPE_UNKNOWN) + if (!(*new_engine= ha_checktype(thd, ha_legacy_type(req_engine), + no_substitution, 1))) return TRUE; - if (req_engine != *new_engine) + if (req_engine != (handlerton*) &default_hton && req_engine != *new_engine) { push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_WARN_USING_OTHER_HANDLER, ER(ER_WARN_USING_OTHER_HANDLER), - ha_get_storage_engine(*new_engine), + ha_resolve_storage_engine_name(*new_engine), table_name); } + if (create_info->options & HA_LEX_CREATE_TMP_TABLE && + ha_check_storage_engine_flag(*new_engine, HTON_TEMPORARY_NOT_SUPPORTED)) + { + if (create_info->used_fields & HA_CREATE_USED_ENGINE) + { + my_error(ER_ILLEGAL_HA_CREATE_OPTION, MYF(0), (*new_engine)->name, "TEMPORARY"); + *new_engine= 0; + return TRUE; + } + *new_engine= &myisam_hton; + } return FALSE; } diff --git a/sql/sql_tablespace.cc b/sql/sql_tablespace.cc new file mode 100644 index 00000000000..954d65ea44e --- /dev/null +++ b/sql/sql_tablespace.cc @@ -0,0 +1,56 @@ +/* Copyright (C) 2000-2004 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* drop and alter of tablespaces */ + +#include "mysql_priv.h" + +int mysql_alter_tablespace(THD *thd, st_alter_tablespace *ts_info) +{ + int error= HA_ADMIN_NOT_IMPLEMENTED; + handlerton *hton; + + DBUG_ENTER("mysql_alter_tablespace"); + /* + If the user haven't defined an engine, this will fallback to using the + default storage engine. + */ + hton= ha_resolve_by_legacy_type(thd, ts_info->storage_engine); + + if (hton->state == SHOW_OPTION_YES && + hton->alter_tablespace && (error= hton->alter_tablespace(thd, ts_info))) + { + if (error == HA_ADMIN_NOT_IMPLEMENTED) + { + my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), ""); + } + else if (error == 1) + { + DBUG_RETURN(1); + } + else + { + my_error(error, MYF(0)); + } + DBUG_RETURN(error); + } + if (mysql_bin_log.is_open()) + { + thd->binlog_query(THD::STMT_QUERY_TYPE, + thd->query, thd->query_length, FALSE, TRUE); + } + DBUG_RETURN(FALSE); +} diff --git a/sql/sql_test.cc b/sql/sql_test.cc index 975cc19ea3f..bf86630d28c 100644 --- a/sql/sql_test.cc +++ b/sql/sql_test.cc @@ -79,7 +79,7 @@ void print_cached_tables(void) { TABLE *entry=(TABLE*) hash_element(&open_cache,idx); printf("%-14.14s %-32s%6ld%8ld%10ld%6d %s\n", - entry->s->db, entry->s->table_name, entry->s->version, + entry->s->db.str, entry->s->table_name.str, entry->s->version, entry->in_use ? entry->in_use->thread_id : 0L, entry->in_use ? entry->in_use->dbug_thread_id : 0L, entry->db_stat ? 1 : 0, entry->in_use ? lock_descriptions[(int)entry->reginfo.lock_type] : "Not in use"); @@ -261,7 +261,7 @@ print_plan(JOIN* join, double read_time, double record_count, pos = join->positions[i]; table= pos.table->table; if (table) - fputs(table->s->table_name, DBUG_FILE); + fputs(table->s->table_name.str, DBUG_FILE); fputc(' ', DBUG_FILE); } fputc('\n', DBUG_FILE); @@ -278,7 +278,7 @@ print_plan(JOIN* join, double read_time, double record_count, pos= join->best_positions[i]; table= pos.table->table; if (table) - fputs(table->s->table_name, DBUG_FILE); + fputs(table->s->table_name.str, DBUG_FILE); fputc(' ', DBUG_FILE); } } @@ -289,7 +289,7 @@ print_plan(JOIN* join, double read_time, double record_count, for (plan_nodes= join->best_ref ; *plan_nodes ; plan_nodes++) { join_table= (*plan_nodes); - fputs(join_table->table->s->table_name, DBUG_FILE); + fputs(join_table->table->s->table_name.str, DBUG_FILE); fprintf(DBUG_FILE, "(%lu,%lu,%lu)", (ulong) join_table->found_records, (ulong) join_table->records, @@ -336,8 +336,8 @@ static void push_locks_into_array(DYNAMIC_ARRAY *ar, THR_LOCK_DATA *data, { TABLE_LOCK_INFO table_lock_info; table_lock_info.thread_id= table->in_use->thread_id; - memcpy(table_lock_info.table_name, table->s->table_cache_key, - table->s->key_length); + memcpy(table_lock_info.table_name, table->s->table_cache_key.str, + table->s->table_cache_key.length); table_lock_info.table_name[strlen(table_lock_info.table_name)]='.'; table_lock_info.waiting=wait; table_lock_info.lock_text=text; @@ -484,7 +484,7 @@ Open tables: %10lu\n\ Open files: %10lu\n\ Open streams: %10lu\n", tmp.opened_tables, - (ulong) cached_tables(), + (ulong) cached_open_tables(), (ulong) my_file_opened, (ulong) my_stream_opened); diff --git a/sql/sql_trigger.cc b/sql/sql_trigger.cc index 4c3ae5c032d..e33dc52df8f 100644 --- a/sql/sql_trigger.cc +++ b/sql/sql_trigger.cc @@ -176,20 +176,26 @@ bool mysql_create_or_drop_trigger(THD *thd, TABLE_LIST *tables, bool create) DBUG_ASSERT(tables->next_global == 0); /* - TODO: We should check if user has TRIGGER privilege for table here. - Now we just require SUPER privilege for creating/dropping because - we don't have proper privilege checking for triggers in place yet. + Check that the user has TRIGGER privilege on the subject table. */ - if (check_global_access(thd, SUPER_ACL)) - DBUG_RETURN(TRUE); + { + bool err_status; + TABLE_LIST **save_query_tables_own_last= thd->lex->query_tables_own_last; + thd->lex->query_tables_own_last= 0; + + err_status= check_table_access(thd, TRIGGER_ACL, tables, 0); + + thd->lex->query_tables_own_last= save_query_tables_own_last; + + if (err_status) + DBUG_RETURN(TRUE); + } /* There is no DETERMINISTIC clause for triggers, so can't check it. But a trigger can in theory be used to do nasty things (if it supported - DROP for example) so we do the check for privileges. For now there is - already a stronger test right above; but when this stronger test will - be removed, the test below will hold. Because triggers have the same - nature as functions regarding binlogging: their body is implicitely + DROP for example) so we do the check for privileges. Triggers have the + same nature as functions regarding binlogging: their body is implicitely binlogged, so they share the same danger, so trust_function_creators applies to them too. */ @@ -201,7 +207,7 @@ bool mysql_create_or_drop_trigger(THD *thd, TABLE_LIST *tables, bool create) } /* We do not allow creation of triggers on temporary tables. */ - if (create && find_temporary_table(thd, tables->db, tables->table_name)) + if (create && find_temporary_table(thd, tables)) { my_error(ER_TRG_ON_VIEW_OR_TEMP_TABLE, MYF(0), tables->alias); DBUG_RETURN(TRUE); @@ -311,9 +317,8 @@ bool Table_triggers_list::create_trigger(THD *thd, TABLE_LIST *tables, { LEX *lex= thd->lex; TABLE *table= tables->table; - char dir_buff[FN_REFLEN], file_buff[FN_REFLEN], trigname_buff[FN_REFLEN], - trigname_path[FN_REFLEN]; - LEX_STRING dir, file, trigname_file; + char file_buff[FN_REFLEN], trigname_buff[FN_REFLEN]; + LEX_STRING file, trigname_file; LEX_STRING *trg_def, *name; ulonglong *trg_sql_mode; char trg_definer_holder[USER_HOST_BUFF_SIZE]; @@ -323,7 +328,8 @@ bool Table_triggers_list::create_trigger(THD *thd, TABLE_LIST *tables, /* Trigger must be in the same schema as target table. */ - if (my_strcasecmp(table_alias_charset, table->s->db, lex->spname->m_db.str)) + if (my_strcasecmp(table_alias_charset, table->s->db.str, + lex->spname->m_db.str)) { my_error(ER_TRG_IN_WRONG_SCHEMA, MYF(0)); return 1; @@ -396,20 +402,18 @@ bool Table_triggers_list::create_trigger(THD *thd, TABLE_LIST *tables, sql_create_definition_file() files handles renaming and backup of older versions */ - strxnmov(dir_buff, FN_REFLEN, mysql_data_home, "/", tables->db, "/", NullS); - dir.length= unpack_filename(dir_buff, dir_buff); - dir.str= dir_buff; - file.length= strxnmov(file_buff, FN_REFLEN, tables->table_name, - triggers_file_ext, NullS) - file_buff; + file.length= build_table_filename(file_buff, FN_REFLEN-1, + tables->db, tables->table_name, + triggers_file_ext); file.str= file_buff; - trigname_file.length= strxnmov(trigname_buff, FN_REFLEN, - lex->spname->m_name.str, - trigname_file_ext, NullS) - trigname_buff; + trigname_file.length= build_table_filename(trigname_buff, FN_REFLEN-1, + tables->db, + lex->spname->m_name.str, + trigname_file_ext); trigname_file.str= trigname_buff; - strxnmov(trigname_path, FN_REFLEN, dir_buff, trigname_buff, NullS); /* Use the filesystem to enforce trigger namespace constraints. */ - if (!access(trigname_path, F_OK)) + if (!access(trigname_buff, F_OK)) { my_error(ER_TRG_ALREADY_EXISTS, MYF(0)); return 1; @@ -418,7 +422,7 @@ bool Table_triggers_list::create_trigger(THD *thd, TABLE_LIST *tables, trigname.trigger_table.str= tables->table_name; trigname.trigger_table.length= tables->table_name_length; - if (sql_create_definition_file(&dir, &trigname_file, &trigname_file_type, + if (sql_create_definition_file(NULL, &trigname_file, &trigname_file_type, (gptr)&trigname, trigname_file_parameters, 0)) return 1; @@ -465,12 +469,12 @@ bool Table_triggers_list::create_trigger(THD *thd, TABLE_LIST *tables, trg_definer->length= strxmov(trg_definer->str, definer_user->str, "@", definer_host->str, NullS) - trg_definer->str; - if (!sql_create_definition_file(&dir, &file, &triggers_file_type, + if (!sql_create_definition_file(NULL, &file, &triggers_file_type, (gptr)this, triggers_file_parameters, 0)) return 0; err_with_cleanup: - my_delete(trigname_path, MYF(MY_WME)); + my_delete(trigname_buff, MYF(MY_WME)); return 1; } @@ -493,9 +497,7 @@ err_with_cleanup: static bool rm_trigger_file(char *path, const char *db, const char *table_name) { - strxnmov(path, FN_REFLEN, mysql_data_home, "/", db, "/", table_name, - triggers_file_ext, NullS); - unpack_filename(path, path); + build_table_filename(path, FN_REFLEN-1, db, table_name, triggers_file_ext); return my_delete(path, MYF(MY_WME)); } @@ -518,9 +520,7 @@ static bool rm_trigger_file(char *path, const char *db, static bool rm_trigname_file(char *path, const char *db, const char *trigger_name) { - strxnmov(path, FN_REFLEN, mysql_data_home, "/", db, "/", trigger_name, - trigname_file_ext, NullS); - unpack_filename(path, path); + build_table_filename(path, FN_REFLEN-1, db, trigger_name, trigname_file_ext); return my_delete(path, MYF(MY_WME)); } @@ -542,18 +542,15 @@ static bool rm_trigname_file(char *path, const char *db, static bool save_trigger_file(Table_triggers_list *triggers, const char *db, const char *table_name) { - char dir_buff[FN_REFLEN], file_buff[FN_REFLEN]; - LEX_STRING dir, file; - - strxnmov(dir_buff, FN_REFLEN, mysql_data_home, "/", db, "/", NullS); - dir.length= unpack_filename(dir_buff, dir_buff); - dir.str= dir_buff; - file.length= strxnmov(file_buff, FN_REFLEN, table_name, triggers_file_ext, - NullS) - file_buff; - file.str= file_buff; + char file_buff[FN_REFLEN]; + LEX_STRING file; - return sql_create_definition_file(&dir, &file, &triggers_file_type, - (gptr)triggers, triggers_file_parameters, 0); + file.length= build_table_filename(file_buff, FN_REFLEN-1, db, table_name, + triggers_file_ext); + file.str= file_buff; + return sql_create_definition_file(NULL, &file, &triggers_file_type, + (gptr)triggers, triggers_file_parameters, + 0); } @@ -667,7 +664,7 @@ bool Table_triggers_list::prepare_record1_accessors(TABLE *table) */ if (!(*old_fld= (*fld)->new_field(&table->mem_root, table))) return 1; - (*old_fld)->move_field((my_ptrdiff_t)(table->record[1] - + (*old_fld)->move_field_offset((my_ptrdiff_t)(table->record[1] - table->record[0])); } *old_fld= 0; @@ -722,9 +719,8 @@ bool Table_triggers_list::check_n_load(THD *thd, const char *db, DBUG_ENTER("Table_triggers_list::check_n_load"); - strxnmov(path_buff, FN_REFLEN, mysql_data_home, "/", db, "/", table_name, - triggers_file_ext, NullS); - path.length= unpack_filename(path_buff, path_buff); + path.length= build_table_filename(path_buff, FN_REFLEN-1, + db, table_name, triggers_file_ext); path.str= path_buff; // QQ: should we analyze errno somehow ? @@ -905,7 +901,7 @@ bool Table_triggers_list::check_n_load(THD *thd, const char *db, schema. */ - lex.sphead->set_definer("", 0); + lex.sphead->set_definer((char*) "", 0); /* Triggers without definer information are executed under the @@ -1071,9 +1067,9 @@ static TABLE_LIST *add_table_for_trigger(THD *thd, sp_name *trig) struct st_trigname trigname; DBUG_ENTER("add_table_for_trigger"); - strxnmov(path_buff, FN_REFLEN, mysql_data_home, "/", trig->m_db.str, "/", - trig->m_name.str, trigname_file_ext, NullS); - path.length= unpack_filename(path_buff, path_buff); + path.length= build_table_filename(path_buff, FN_REFLEN-1, + trig->m_db.str, trig->m_name.str, + trigname_file_ext); path.str= path_buff; if (access(path_buff, F_OK)) @@ -1269,26 +1265,24 @@ Table_triggers_list::change_table_name_in_trignames(const char *db_name, LEX_STRING *new_table_name, LEX_STRING *stopper) { - char dir_buff[FN_REFLEN], trigname_buff[FN_REFLEN]; + char trigname_buff[FN_REFLEN]; struct st_trigname trigname; - LEX_STRING dir, trigname_file; + LEX_STRING trigname_file; LEX_STRING *trigger; List_iterator_fast<LEX_STRING> it_name(names_list); - strxnmov(dir_buff, FN_REFLEN, mysql_data_home, "/", db_name, "/", NullS); - dir.length= unpack_filename(dir_buff, dir_buff); - dir.str= dir_buff; - while ((trigger= it_name++) != stopper) { - trigname_file.length= strxnmov(trigname_buff, FN_REFLEN, trigger->str, - trigname_file_ext, NullS) - trigname_buff; + trigname_file.length= build_table_filename(trigname_buff, FN_REFLEN-1, + db_name, trigger->str, + trigname_file_ext); trigname_file.str= trigname_buff; trigname.trigger_table= *new_table_name; - if (sql_create_definition_file(&dir, &trigname_file, &trigname_file_type, - (gptr)&trigname, trigname_file_parameters, 0)) + if (sql_create_definition_file(NULL, &trigname_file, &trigname_file_type, + (gptr)&trigname, trigname_file_parameters, + 0)) return trigger; } @@ -1415,16 +1409,6 @@ bool Table_triggers_list::process_triggers(THD *thd, trg_event_type event, return TRUE; /* - NOTE: TRIGGER_ACL should be used below. - */ - - if (check_global_access(thd, SUPER_ACL)) - { - sp_restore_security_context(thd, save_ctx); - return TRUE; - } - - /* Fetch information about table-level privileges to GRANT_INFO structure for subject table. Check of privileges that will use it and information about column-level privileges will happen in Item_trigger_field::fix_fields(). @@ -1432,7 +1416,22 @@ bool Table_triggers_list::process_triggers(THD *thd, trg_event_type event, fill_effective_table_privileges(thd, &subject_table_grants[event][time_type], - table->s->db, table->s->table_name); + table->s->db.str, table->s->table_name.str); + + /* Check that the definer has TRIGGER privilege on the subject table. */ + + if (!(subject_table_grants[event][time_type].privilege & TRIGGER_ACL)) + { + char priv_desc[128]; + get_privilege_desc(priv_desc, sizeof(priv_desc), TRIGGER_ACL); + + my_error(ER_TABLEACCESS_DENIED_ERROR, MYF(0), priv_desc, + thd->security_ctx->priv_user, thd->security_ctx->host_or_ip, + table->s->table_name.str); + + sp_restore_security_context(thd, save_ctx); + return TRUE; + } #endif // NO_EMBEDDED_ACCESS_CHECKS thd->reset_sub_statement_state(&statement_state, SUB_STMT_TRIGGER); diff --git a/sql/sql_udf.cc b/sql/sql_udf.cc index 3f8a4e346f9..69c5388f776 100644 --- a/sql/sql_udf.cc +++ b/sql/sql_udf.cc @@ -38,36 +38,10 @@ #ifdef HAVE_DLOPEN extern "C" { -#if defined(__WIN__) - void* dlsym(void* lib,const char* name) - { - return GetProcAddress((HMODULE)lib,name); - } - void* dlopen(const char* libname,int unused) - { - return LoadLibraryEx(libname,NULL,0); - } - void dlclose(void* lib) - { - FreeLibrary((HMODULE)lib); - } - -#elif !defined(OS2) -#include <dlfcn.h> -#endif - #include <stdarg.h> #include <hash.h> } -#ifndef RTLD_NOW -#define RTLD_NOW 1 // For FreeBSD 2.2.2 -#endif - -#ifndef HAVE_DLERROR -#define dlerror() "" -#endif - static bool initialized = 0; static MEM_ROOT mem; static HASH udf_hash; @@ -172,7 +146,7 @@ void udf_init() if (simple_open_n_lock_tables(new_thd, &tables)) { DBUG_PRINT("error",("Can't open udf table")); - sql_print_error("Can't open the mysql.func table. Please run the mysql_install_db script to create it."); + sql_print_error("Can't open the mysql.func table. Please run the mysql_upgrade script to create it."); goto end; } @@ -195,9 +169,8 @@ void udf_init() This is done to ensure that only approved dll from the system directories are used (to make this even remotely secure). */ - if (strchr(dl_name, '/') || - IF_WIN(strchr(dl_name, '\\'),0) || - strlen(name.str) > NAME_LEN) + if (my_strchr(files_charset_info, dl_name, dl_name + strlen(dl_name), FN_LIBCHAR) || + strlen(name.str) > NAME_LEN) { sql_print_error("Invalid row in mysql.func table for function '%.64s'", name.str); @@ -215,10 +188,13 @@ void udf_init() void *dl = find_udf_dl(tmp->dl); if (dl == NULL) { - if (!(dl = dlopen(tmp->dl, RTLD_NOW))) + char dlpath[FN_REFLEN]; + strxnmov(dlpath, sizeof(dlpath) - 1, opt_plugin_dir, "/", tmp->dl, + NullS); + if (!(dl= dlopen(dlpath, RTLD_NOW))) { /* Print warning to log */ - sql_print_error(ER(ER_CANT_OPEN_LIBRARY), tmp->dl,errno,dlerror()); + sql_print_error(ER(ER_CANT_OPEN_LIBRARY), dlpath, errno, dlerror()); /* Keep the udf in the hash so that we can remove it later */ continue; } @@ -420,7 +396,7 @@ int mysql_create_function(THD *thd,udf_func *udf) This is done to ensure that only approved dll from the system directories are used (to make this even remotely secure). */ - if (strchr(udf->dl, '/') || IF_WIN(strchr(udf->dl, '\\'),0)) + if (my_strchr(files_charset_info, udf->dl, udf->dl + strlen(udf->dl), FN_LIBCHAR)) { my_message(ER_UDF_NO_PATHS, ER(ER_UDF_NO_PATHS), MYF(0)); DBUG_RETURN(1); @@ -439,12 +415,14 @@ int mysql_create_function(THD *thd,udf_func *udf) } if (!(dl = find_udf_dl(udf->dl))) { - if (!(dl = dlopen(udf->dl, RTLD_NOW))) + char dlpath[FN_REFLEN]; + strxnmov(dlpath, sizeof(dlpath) - 1, opt_plugin_dir, "/", udf->dl, NullS); + if (!(dl = dlopen(dlpath, RTLD_NOW))) { DBUG_PRINT("error",("dlopen of %s failed, error: %d (%s)", - udf->dl,errno,dlerror())); + dlpath, errno, dlerror())); my_error(ER_CANT_OPEN_LIBRARY, MYF(0), - udf->dl, errno, dlerror()); + dlpath, errno, dlerror()); goto err; } new_dl=1; @@ -484,7 +462,7 @@ int mysql_create_function(THD *thd,udf_func *udf) table->field[2]->store(u_d->dl,(uint) strlen(u_d->dl), system_charset_info); if (table->s->fields >= 4) // If not old func format table->field[3]->store((longlong) u_d->type, TRUE); - error = table->file->write_row(table->record[0]); + error = table->file->ha_write_row(table->record[0]); close_thread_tables(thd); if (error) @@ -536,14 +514,14 @@ int mysql_drop_function(THD *thd,const LEX_STRING *udf_name) if (!(table = open_ltable(thd,&tables,TL_WRITE))) goto err; table->field[0]->store(udf_name->str, udf_name->length, system_charset_info); - table->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS); + table->file->ha_retrieve_all_cols(); if (!table->file->index_read_idx(table->record[0], 0, (byte*) table->field[0]->ptr, table->key_info[0].key_length, HA_READ_KEY_EXACT)) { int error; - if ((error = table->file->delete_row(table->record[0]))) + if ((error = table->file->ha_delete_row(table->record[0]))) table->file->print_error(error, MYF(0)); } close_thread_tables(thd); diff --git a/sql/sql_union.cc b/sql/sql_union.cc index dee88af7d83..e80aaecfd64 100644 --- a/sql/sql_union.cc +++ b/sql/sql_union.cc @@ -62,7 +62,7 @@ bool select_union::send_data(List<Item> &values) if (thd->net.report_error) return 1; - if ((error= table->file->write_row(table->record[0]))) + if ((error= table->file->ha_write_row(table->record[0]))) { /* create_myisam_from_heap will generate error if needed */ if (error != HA_ERR_FOUND_DUPP_KEY && error != HA_ERR_FOUND_DUPP_UNIQUE && diff --git a/sql/sql_update.cc b/sql/sql_update.cc index bfdd986f576..b19d6504b2e 100644 --- a/sql/sql_update.cc +++ b/sql/sql_update.cc @@ -119,10 +119,10 @@ int mysql_update(THD *thd, { bool using_limit= limit != HA_POS_ERROR; bool safe_update= thd->options & OPTION_SAFE_UPDATES; - bool used_key_is_modified, transactional_table; + bool used_key_is_modified, transactional_table, will_batch; int res; - int error; - uint used_index= MAX_KEY; + int error, loc_error; + uint used_index= MAX_KEY, dup_key_found; bool need_sort= TRUE; #ifndef NO_EMBEDDED_ACCESS_CHECKS uint want_privilege; @@ -168,7 +168,6 @@ int mysql_update(THD *thd, thd->proc_info="init"; table= table_list->table; - table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK); /* Calculate "table->used_keys" based on the WHERE */ table->used_keys= table->s->keys_in_use; @@ -197,7 +196,11 @@ int mysql_update(THD *thd, table_list->grant.want_privilege= table->grant.want_privilege= want_privilege; table_list->register_want_access(want_privilege); #endif - if (setup_fields_with_no_wrap(thd, 0, fields, 1, 0, 0)) + /* + Indicate that the set of fields is to be updated by passing 2 for + set_query_id. + */ + if (setup_fields_with_no_wrap(thd, 0, fields, 2, 0, 0)) DBUG_RETURN(1); /* purecov: inspected */ if (table_list->view && check_fields(thd, fields)) { @@ -214,7 +217,10 @@ int mysql_update(THD *thd, if (table->timestamp_field->query_id == thd->query_id) table->timestamp_field_type= TIMESTAMP_NO_AUTO_SET; else + { table->timestamp_field->query_id=timestamp_query_id; + table->file->ha_set_bit_in_write_set(table->timestamp_field->fieldnr); + } } #ifndef NO_EMBEDDED_ACCESS_CHECKS @@ -237,6 +243,18 @@ int mysql_update(THD *thd, } // Don't count on usage of 'only index' when calculating which key to use table->used_keys.clear_all(); + +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (prune_partitions(thd, table, conds)) + { + free_underlaid_joins(thd, select_lex); + send_ok(thd); // No matching records + DBUG_RETURN(0); + } +#endif + /* Update the table->file->records number */ + table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK); + select= make_select(table, 0, 0, conds, 0, &error); if (error || !limit || (select && select->check_quick(thd, safe_update, limit))) @@ -284,13 +302,18 @@ int mysql_update(THD *thd, used_key_is_modified= check_if_key_used(table, used_index, fields); } +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (used_key_is_modified || order || + partition_key_modified(table, fields)) +#else if (used_key_is_modified || order) +#endif { /* We can't update table directly; We must first search after all matching rows before updating the table! */ - table->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS); + table->file->ha_retrieve_all_cols(); if (used_index < MAX_KEY && old_used_keys.is_set(used_index)) { table->key_read=1; @@ -353,6 +376,12 @@ int mysql_update(THD *thd, B.2 quick select is not used, this is full index scan (with LIMIT) Full index scan must be started with init_read_record_idx */ + /* If quick select is used, initialize it before retrieving rows. */ + if (select && select->quick && select->quick->reset()) + goto err; + + table->file->try_semi_consistent_read(1); + if (used_index == MAX_KEY || (select && select->quick)) init_read_record(&info,thd,table,select,0,1); else @@ -365,6 +394,9 @@ int mysql_update(THD *thd, { if (!(select && select->skip_record())) { + if (table->file->was_semi_consistent_read()) + continue; /* repeat the read of the same row if it still exists */ + table->file->position(table->record[0]); if (my_b_write(&tempfile,table->file->ref, table->file->ref_length)) @@ -384,6 +416,7 @@ int mysql_update(THD *thd, if (thd->killed && !error) error= 1; // Aborted limit= tmp_limit; + table->file->try_semi_consistent_read(0); end_read_record(&info); /* Change select to use tempfile */ @@ -418,6 +451,7 @@ int mysql_update(THD *thd, if (select && select->quick && select->quick->reset()) goto err; + table->file->try_semi_consistent_read(1); init_read_record(&info,thd,table,select,0,1); updated= found= 0; @@ -432,11 +466,15 @@ int mysql_update(THD *thd, (thd->variables.sql_mode & (MODE_STRICT_TRANS_TABLES | MODE_STRICT_ALL_TABLES))); + will_batch= !table->file->start_bulk_update(); while (!(error=info.read_record(&info)) && !thd->killed) { if (!(select && select->skip_record())) { + if (table->file->was_semi_consistent_read()) + continue; /* repeat the read of the same row if it still exists */ + store_record(table,record[1]); if (fill_record_n_invoke_before_triggers(thd, fields, values, 0, table->triggers, @@ -459,8 +497,47 @@ int mysql_update(THD *thd, break; } } - if (!(error=table->file->update_row((byte*) table->record[1], - (byte*) table->record[0]))) + if (will_batch) + { + /* + Typically a batched handler can execute the batched jobs when: + 1) When specifically told to do so + 2) When it is not a good idea to batch anymore + 3) When it is necessary to send batch for other reasons + (One such reason is when READ's must be performed) + + 1) is covered by exec_bulk_update calls. + 2) and 3) is handled by the bulk_update_row method. + + bulk_update_row can execute the updates including the one + defined in the bulk_update_row or not including the row + in the call. This is up to the handler implementation and can + vary from call to call. + + The dup_key_found reports the number of duplicate keys found + in those updates actually executed. It only reports those if + the extra call with HA_EXTRA_IGNORE_DUP_KEY have been issued. + If this hasn't been issued it returns an error code and can + ignore this number. Thus any handler that implements batching + for UPDATE IGNORE must also handle this extra call properly. + + If a duplicate key is found on the record included in this + call then it should be included in the count of dup_key_found + and error should be set to 0 (only if these errors are ignored). + */ + error= table->file->bulk_update_row(table->record[1], + table->record[0], + &dup_key_found); + limit+= dup_key_found; + updated-= dup_key_found; + } + else + { + /* Non-batched update */ + error= table->file->ha_update_row((byte*) table->record[1], + (byte*) table->record[0]); + } + if (!error) { updated++; thd->no_trans_update= !transactional_table; @@ -489,20 +566,75 @@ int mysql_update(THD *thd, if (!--limit && using_limit) { - error= -1; // Simulate end of file - break; + /* + We have reached end-of-file in most common situations where no + batching has occurred and if batching was supposed to occur but + no updates were made and finally when the batch execution was + performed without error and without finding any duplicate keys. + If the batched updates were performed with errors we need to + check and if no error but duplicate key's found we need to + continue since those are not counted for in limit. + */ + if (will_batch && + ((error= table->file->exec_bulk_update(&dup_key_found)) || + !dup_key_found)) + { + if (error) + { + /* + The handler should not report error of duplicate keys if they + are ignored. This is a requirement on batching handlers. + */ + table->file->print_error(error,MYF(0)); + error= 1; + break; + } + /* + Either an error was found and we are ignoring errors or there + were duplicate keys found. In both cases we need to correct + the counters and continue the loop. + */ + limit= dup_key_found; //limit is 0 when we get here so need to + + updated-= dup_key_found; + } + else + { + error= -1; // Simulate end of file + break; + } } } else table->file->unlock_row(); thd->row_count++; } + dup_key_found= 0; if (thd->killed && !error) error= 1; // Aborted + else if (will_batch && + (loc_error= table->file->exec_bulk_update(&dup_key_found))) + /* + An error has occurred when a batched update was performed and returned + an error indication. It cannot be an allowed duplicate key error since + we require the batching handler to treat this as a normal behavior. + + Otherwise we simply remove the number of duplicate keys records found + in the batched update. + */ + { + thd->fatal_error(); + table->file->print_error(loc_error,MYF(0)); + error= 1; + } + else + updated-= dup_key_found; + if (will_batch) + table->file->end_bulk_update(); + table->file->try_semi_consistent_read(0); end_read_record(&info); free_io_cache(table); // If ORDER BY delete select; - thd->proc_info="end"; + thd->proc_info= "end"; VOID(table->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY)); /* @@ -529,10 +661,13 @@ int mysql_update(THD *thd, { if (error < 0) thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, - transactional_table, FALSE); - if (mysql_bin_log.write(&qinfo) && transactional_table) + if (thd->binlog_query(THD::ROW_QUERY_TYPE, + thd->query, thd->query_length, + transactional_table, FALSE) && + transactional_table) + { error=1; // Rollback update + } } if (!transactional_table) thd->options|=OPTION_STATUS_NO_TRANS_UPDATE; @@ -712,7 +847,7 @@ reopen_tables: &lex->select_lex.leaf_tables, FALSE)) DBUG_RETURN(TRUE); - if (setup_fields_with_no_wrap(thd, 0, *fields, 1, 0, 0)) + if (setup_fields_with_no_wrap(thd, 0, *fields, 2, 0, 0)) DBUG_RETURN(TRUE); for (tl= table_list; tl ; tl= tl->next_local) @@ -1092,7 +1227,8 @@ multi_update::initialize_tables(JOIN *join) /* ok to be on stack as this is not referenced outside of this func */ Field_string offset(table->file->ref_length, 0, "offset", - table, &my_charset_bin); + &my_charset_bin); + offset.init(table); if (!(ifield= new Item_field(((Field *) &offset)))) DBUG_RETURN(1); ifield->maybe_null= 0; @@ -1267,8 +1403,8 @@ bool multi_update::send_data(List<Item> ¬_used_values) */ main_table->file->extra(HA_EXTRA_PREPARE_FOR_UPDATE); } - if ((error=table->file->update_row(table->record[1], - table->record[0]))) + if ((error=table->file->ha_update_row(table->record[1], + table->record[0]))) { updated--; if (!ignore || error != HA_ERR_FOUND_DUPP_KEY) @@ -1303,7 +1439,7 @@ bool multi_update::send_data(List<Item> ¬_used_values) memcpy((char*) tmp_table->field[0]->ptr, (char*) table->file->ref, table->file->ref_length); /* Write row, ignoring duplicated updates to a row */ - if ((error= tmp_table->file->write_row(tmp_table->record[0]))) + if ((error= tmp_table->file->ha_write_row(tmp_table->record[0]))) { if (error != HA_ERR_FOUND_DUPP_KEY && error != HA_ERR_FOUND_DUPP_UNIQUE && @@ -1420,8 +1556,8 @@ int multi_update::do_updates(bool from_send_error) if (compare_record(table, thd->query_id)) { - if ((local_error=table->file->update_row(table->record[1], - table->record[0]))) + if ((local_error=table->file->ha_update_row(table->record[1], + table->record[0]))) { if (!ignore || local_error != HA_ERR_FOUND_DUPP_KEY) goto err; @@ -1500,10 +1636,13 @@ bool multi_update::send_eof() { if (local_error == 0) thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, - transactional_tables, FALSE); - if (mysql_bin_log.write(&qinfo) && trans_safe) + if (thd->binlog_query(THD::ROW_QUERY_TYPE, + thd->query, thd->query_length, + transactional_tables, FALSE) && + trans_safe) + { local_error= 1; // Rollback update + } } if (!transactional_tables) thd->options|=OPTION_STATUS_NO_TRANS_UPDATE; diff --git a/sql/sql_view.cc b/sql/sql_view.cc index 4f62a80cfd4..c4cb9770e14 100644 --- a/sql/sql_view.cc +++ b/sql/sql_view.cc @@ -568,8 +568,8 @@ static int mysql_register_view(THD *thd, TABLE_LIST *view, String str(buff,(uint32) sizeof(buff), system_charset_info); char md5[MD5_BUFF_LENGTH]; bool can_be_merged; - char dir_buff[FN_REFLEN], file_buff[FN_REFLEN]; - LEX_STRING dir, file; + char dir_buff[FN_REFLEN], file_buff[FN_REFLEN], path_buff[FN_REFLEN]; + LEX_STRING dir, file, path; DBUG_ENTER("mysql_register_view"); /* print query */ @@ -584,15 +584,17 @@ static int mysql_register_view(THD *thd, TABLE_LIST *view, DBUG_PRINT("info", ("View: %s", str.ptr())); /* print file name */ - (void) my_snprintf(dir_buff, FN_REFLEN, "%s/%s/", - mysql_data_home, view->db); - unpack_filename(dir_buff, dir_buff); + dir.length= build_table_filename(dir_buff, sizeof(dir_buff), + view->db, "", ""); dir.str= dir_buff; - dir.length= strlen(dir_buff); - file.str= file_buff; - file.length= (strxnmov(file_buff, FN_REFLEN, view->table_name, reg_ext, - NullS) - file_buff); + path.length= build_table_filename(path_buff, sizeof(path_buff), + view->db, view->table_name, reg_ext); + path.str= path_buff; + + file.str= path.str + dir.length; + file.length= path.length - dir.length; + /* init timestamp */ if (!view->timestamp.str) view->timestamp.str= view->timestamp_buffer; @@ -1175,17 +1177,17 @@ err: bool mysql_drop_view(THD *thd, TABLE_LIST *views, enum_drop_mode drop_mode) { - DBUG_ENTER("mysql_drop_view"); char path[FN_REFLEN]; TABLE_LIST *view; - bool type= 0; - db_type not_used; + enum legacy_db_type not_used; + DBUG_ENTER("mysql_drop_view"); for (view= views; view; view= view->next_local) { - strxnmov(path, FN_REFLEN, mysql_data_home, "/", view->db, "/", - view->table_name, reg_ext, NullS); - (void) unpack_filename(path, path); + TABLE_SHARE *share; + bool type= 0; + build_table_filename(path, sizeof(path), + view->db, view->table_name, reg_ext); VOID(pthread_mutex_lock(&LOCK_open)); if (access(path, F_OK) || (type= (mysql_frm_type(thd, path, ¬_used) != FRMTYPE_VIEW))) @@ -1208,6 +1210,20 @@ bool mysql_drop_view(THD *thd, TABLE_LIST *views, enum_drop_mode drop_mode) } if (my_delete(path, MYF(MY_WME))) goto err; + + /* + For a view, there is only one table_share object which should never + be used outside of LOCK_open + */ + if ((share= get_cached_table_share(view->db, view->table_name))) + { + DBUG_ASSERT(share->ref_count == 0); + pthread_mutex_lock(&share->mutex); + share->ref_count++; + share->version= 0; + pthread_mutex_unlock(&share->mutex); + release_table_share(share, RELEASE_WAIT_FOR_DROP); + } query_cache_invalidate3(thd, view, 0); sp_cache_invalidate(); VOID(pthread_mutex_unlock(&LOCK_open)); @@ -1235,7 +1251,7 @@ err: FRMTYPE_VIEW view */ -frm_type_enum mysql_frm_type(THD *thd, char *path, db_type *dbt) +frm_type_enum mysql_frm_type(THD *thd, char *path, enum legacy_db_type *dbt) { File file; uchar header[10]; //"TYPE=VIEW\n" it is 10 characters @@ -1264,7 +1280,7 @@ frm_type_enum mysql_frm_type(THD *thd, char *path, db_type *dbt) (header[2] < FRM_VER+3 || header[2] > FRM_VER+4))) DBUG_RETURN(FRMTYPE_TABLE); - *dbt= ha_checktype(thd, (enum db_type) (uint) *(header + 3), 0, 0); + *dbt= (enum legacy_db_type) (uint) *(header + 3); DBUG_RETURN(FRMTYPE_TABLE); // Is probably a .frm table } @@ -1322,6 +1338,7 @@ bool check_key_in_view(THD *thd, TABLE_LIST *view) */ bool save_set_query_id= thd->set_query_id; thd->set_query_id= 0; + DBUG_PRINT("info", ("thd->set_query_id: %d", thd->set_query_id)); for (Field_translator *fld= trans; fld < end_of_trans; fld++) { if (!fld->item->fixed && fld->item->fix_fields(thd, &fld->item)) @@ -1331,6 +1348,7 @@ bool check_key_in_view(THD *thd, TABLE_LIST *view) } } thd->set_query_id= save_set_query_id; + DBUG_PRINT("info", ("thd->set_query_id: %d", thd->set_query_id)); } /* Loop over all keys to see if a unique-not-null key is used */ for (;key_info != key_info_end ; key_info++) @@ -1486,7 +1504,7 @@ mysql_rename_view(THD *thd, DBUG_ENTER("mysql_rename_view"); - strxnmov(view_path, FN_REFLEN, mysql_data_home, "/", view->db, "/", + strxnmov(view_path, FN_REFLEN-1, mysql_data_home, "/", view->db, "/", view->table_name, reg_ext, NullS); (void) unpack_filename(view_path, view_path); @@ -1520,7 +1538,8 @@ mysql_rename_view(THD *thd, view_def.revision - 1, num_view_backups)) goto err; - strxnmov(dir_buff, FN_REFLEN, mysql_data_home, "/", view->db, "/", NullS); + strxnmov(dir_buff, FN_REFLEN-1, mysql_data_home, "/", view->db, "/", + NullS); (void) unpack_filename(dir_buff, dir_buff); pathstr.str= (char*)dir_buff; diff --git a/sql/sql_view.h b/sql/sql_view.h index cd61d7e9e71..1e3e5f4aa73 100644 --- a/sql/sql_view.h +++ b/sql/sql_view.h @@ -27,7 +27,7 @@ bool check_key_in_view(THD *thd, TABLE_LIST * view); bool insert_view_fields(THD *thd, List<Item> *list, TABLE_LIST *view); -frm_type_enum mysql_frm_type(THD *thd, char *path, db_type *dbt); +frm_type_enum mysql_frm_type(THD *thd, char *path, enum legacy_db_type *dbt); int view_checksum(THD *thd, TABLE_LIST *view); diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index d6d2939bed3..a744d941a4b 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -38,6 +38,7 @@ #include "sp_pcontext.h" #include "sp_rcontext.h" #include "sp.h" +#include "event.h" #include <myisam.h> #include <myisammrg.h> @@ -47,12 +48,7 @@ const LEX_STRING null_lex_str={0,0}; #define yyoverflow(A,B,C,D,E,F) {ulong val= *(F); if (my_yyoverflow((B), (D), &val)) { yyerror((char*) (A)); return 2; } else { *(F)= (YYSIZE_T)val; }} -#define WARN_DEPRECATED(A,B) \ - push_warning_printf(((THD *)yythd), MYSQL_ERROR::WARN_LEVEL_WARN, \ - ER_WARN_DEPRECATED_SYNTAX, \ - ER(ER_WARN_DEPRECATED_SYNTAX), (A), (B)); - -#define YYERROR_UNLESS(A) \ +#define YYERROR_UNLESS(A) \ if (!(A)) \ { \ yyerror(ER(ER_SYNTAX_ERROR)); \ @@ -73,6 +69,7 @@ inline Item *is_truth_value(Item *A, bool v1, bool v2) int num; ulong ulong_num; ulonglong ulonglong_number; + longlong longlong_number; LEX_STRING lex_str; LEX_STRING *lex_str_ptr; LEX_SYMBOL symbol; @@ -91,7 +88,7 @@ inline Item *is_truth_value(Item *A, bool v1, bool v2) enum enum_var_type var_type; Key::Keytype key_type; enum ha_key_alg key_alg; - enum db_type db_type; + handlerton *db_type; enum row_type row_type; enum ha_rkey_function ha_rkey_mode; enum enum_tx_isolation tx_isolation; @@ -107,6 +104,7 @@ inline Item *is_truth_value(Item *A, bool v1, bool v2) struct { int vars, conds, hndlrs, curs; } spblock; sp_name *spname; struct st_lex *lex; + sp_head *sphead; } %{ @@ -118,6 +116,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token END_OF_INPUT %token ABORT_SYM +%token ACCESSIBLE_SYM %token ACTION %token ADD %token ADDDATE_SYM @@ -135,8 +134,11 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token ASC %token ASCII_SYM %token ASENSITIVE_SYM +%token AT_SYM %token ATAN +%token AUTHORS_SYM %token AUTO_INC +%token AUTOEXTEND_SIZE_SYM %token AVG_ROW_LENGTH %token AVG_SYM %token BACKUP_SYM @@ -184,6 +186,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token COMMITTED_SYM %token COMMIT_SYM %token COMPACT_SYM +%token COMPLETION_SYM %token COMPRESSED_SYM %token CONCAT %token CONCAT_WS @@ -206,6 +209,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token CURTIME %token DATABASE %token DATABASES +%token DATAFILE_SYM %token DATA_SYM %token DATETIME %token DATE_ADD_INTERVAL @@ -235,6 +239,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token DIRECTORY_SYM %token DISABLE_SYM %token DISCARD +%token DISK_SYM %token DISTINCT %token DIV_SYM %token DOUBLE_SYM @@ -252,6 +257,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token ENCODE_SYM %token ENCRYPT %token END +%token ENDS_SYM %token ENGINES_SYM %token ENGINE_SYM %token ENUM @@ -260,13 +266,16 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token ERRORS %token ESCAPED %token ESCAPE_SYM +%token EVENT_SYM %token EVENTS_SYM +%token EVERY_SYM %token EXECUTE_SYM %token EXISTS %token EXIT_SYM %token EXPANSION_SYM %token EXPORT_SET %token EXTENDED_SYM +%token EXTENT_SIZE_SYM %token EXTRACT_SYM %token FALSE_SYM %token FAST_SYM @@ -329,12 +338,14 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token INDEXES %token INDEX_SYM %token INFILE +%token INITIAL_SIZE_SYM %token INNER_SYM %token INNOBASE_SYM %token INOUT_SYM %token INSENSITIVE_SYM %token INSERT %token INSERT_METHOD +%token INSTALL_SYM %token INTERVAL_SYM %token INTO %token INT_SYM @@ -358,19 +369,23 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token LEAVES %token LEAVE_SYM %token LEFT +%token LESS_SYM %token LEVEL_SYM %token LEX_HOSTNAME %token LIKE %token LIMIT +%token LINEAR_SYM %token LINEFROMTEXT %token LINES %token LINESTRING +%token LIST_SYM %token LOAD %token LOCAL_SYM %token LOCATE %token LOCATOR_SYM %token LOCKS_SYM %token LOCK_SYM +%token LOGFILE_SYM %token LOGS_SYM %token LOG_SYM %token LONGBLOB @@ -401,13 +416,16 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token MAX_CONNECTIONS_PER_HOUR %token MAX_QUERIES_PER_HOUR %token MAX_ROWS +%token MAX_SIZE_SYM %token MAX_SYM %token MAX_UPDATES_PER_HOUR %token MAX_USER_CONNECTIONS_SYM +%token MAX_VALUE_SYM %token MEDIUMBLOB %token MEDIUMINT %token MEDIUMTEXT %token MEDIUM_SYM +%token MEMORY_SYM %token MERGE_SYM %token MICROSECOND_SYM %token MIGRATE_SYM @@ -438,11 +456,13 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token NE %token NEW_SYM %token NEXT_SYM +%token NODEGROUP_SYM %token NONE_SYM %token NOT2_SYM %token NOT_SYM %token NOW_SYM %token NO_SYM +%token NO_WAIT_SYM %token NO_WRITE_TO_BINLOG %token NULL_SYM %token NUM @@ -465,10 +485,14 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token OUTFILE %token OUT_SYM %token PACK_KEYS_SYM +%token PARSER_SYM %token PARTIAL +%token PARTITION_SYM +%token PARTITIONS_SYM %token PASSWORD %token PARAM_MARKER %token PHASE_SYM +%token PLUGIN_SYM %token POINTFROMTEXT %token POINT_SYM %token POLYFROMTEXT @@ -476,6 +500,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token POSITION_SYM %token PRECISION %token PREPARE_SYM +%token PRESERVE_SYM %token PREV_SYM %token PRIMARY_SYM %token PRIVILEGES @@ -486,16 +511,17 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token QUARTER_SYM %token QUERY_SYM %token QUICK -%token RAID_0_SYM -%token RAID_CHUNKS -%token RAID_CHUNKSIZE -%token RAID_STRIPED_SYM -%token RAID_TYPE %token RAND +%token RANGE_SYM %token READS_SYM +%token READ_ONLY_SYM %token READ_SYM +%token READ_WRITE_SYM %token REAL +%token REBUILD_SYM %token RECOVER_SYM +%token REDO_BUFFER_SIZE_SYM +%token REDOFILE_SYM %token REDUNDANT_SYM %token REFERENCES %token REGEXP @@ -505,6 +531,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token RELEASE_SYM %token RELOAD %token RENAME +%token REORGANIZE_SYM %token REPAIR %token REPEATABLE_SYM %token REPEAT_SYM @@ -530,6 +557,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token ROW_SYM %token RTREE_SYM %token SAVEPOINT_SYM +%token SCHEDULE_SYM %token SECOND_MICROSECOND_SYM %token SECOND_SYM %token SECURITY_SYM @@ -551,6 +579,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token SLAVE %token SMALLINT %token SNAPSHOT_SYM +%token SONAME_SYM %token SOUNDS_SYM %token SPATIAL_SYM %token SPECIFIC_SYM @@ -568,6 +597,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token SSL_SYM %token STARTING %token START_SYM +%token STARTS_SYM %token STATUS_SYM %token STD_SYM %token STDDEV_SAMP_SYM @@ -577,6 +607,8 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token STRING_SYM %token SUBDATE_SYM %token SUBJECT_SYM +%token SUBPARTITION_SYM +%token SUBPARTITIONS_SYM %token SUBSTRING %token SUBSTRING_INDEX %token SUM_SYM @@ -598,6 +630,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token TINYBLOB %token TINYINT %token TINYTEXT +%token THAN_SYM %token TO_SYM %token TRAILING %token TRANSACTION_SYM @@ -609,24 +642,23 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token TYPES_SYM %token TYPE_SYM %token UDF_RETURNS_SYM -%token UDF_SONAME_SYM %token ULONGLONG_NUM %token UNCOMMITTED_SYM %token UNDEFINED_SYM +%token UNDO_BUFFER_SIZE_SYM +%token UNDOFILE_SYM %token UNDERSCORE_CHARSET %token UNDO_SYM %token UNICODE_SYM +%token UNINSTALL_SYM %token UNION_SYM %token UNIQUE_SYM %token UNIQUE_USERS %token UNIX_TIMESTAMP %token UNKNOWN_SYM %token UNLOCK_SYM -%token UNLOCK_SYM %token UNSIGNED %token UNTIL_SYM -%token UNTIL_SYM -%token UPDATE_SYM %token UPDATE_SYM %token UPGRADE_SYM %token USAGE @@ -646,6 +678,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token VARIANCE_SYM %token VARYING %token VIEW_SYM +%token WAIT_SYM %token WARNINGS %token WEEK_SYM %token WHEN_SYM @@ -662,6 +695,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token YEAR_SYM %token ZEROFILL + %left JOIN_SYM INNER_SYM STRAIGHT_JOIN CROSS LEFT RIGHT /* A dummy token to force the priority of table_ref production in a join. */ %left TABLE_REF_PRIORITY @@ -688,7 +722,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); sp_opt_label BIN_NUM label_ident TEXT_STRING_filesystem %type <lex_str_ptr> - opt_table_alias + opt_table_alias opt_fulltext_parser %type <table> table_ident table_ident_nodb references xid @@ -708,12 +742,18 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); opt_ignore_leaves fulltext_options spatial_type union_option start_transaction_opts opt_chain opt_release union_opt select_derived_init option_type2 + opt_natural_language_mode opt_query_expansion + opt_ev_status opt_ev_on_completion ev_on_completion opt_ev_comment + ev_alter_on_schedule_completion opt_ev_rename_to opt_ev_sql_stmt %type <ulong_num> - ulong_num raid_types merge_insert_types + ulong_num merge_insert_types %type <ulonglong_number> - ulonglong_num + ulonglong_num size_number + +%type <longlong_number> + part_bit_expr %type <lock_type> replace_lock_option opt_low_priority insert_lock_option load_data_lock @@ -731,6 +771,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); sp_opt_default simple_ident_nospvar simple_ident_q field_or_var limit_option + part_func_expr %type <item_num> NUM_literal @@ -821,7 +862,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); clear_privileges flush_options flush_option equal optional_braces opt_key_definition key_usage_list2 opt_mi_check_type opt_to mi_check_types normal_join - table_to_table_list table_to_table opt_table_list opt_as + db_to_db table_to_table_list table_to_table opt_table_list opt_as handler_rkey_function handler_read_or_scan single_multi table_wild_list table_wild_one opt_wild union_clause union_list @@ -835,9 +876,16 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); definer view_replace_or_algorithm view_replace view_algorithm_opt view_algorithm view_or_trigger_tail view_suid view_tail view_list_opt view_list view_select view_check_option trigger_tail + install uninstall partition_entry binlog_base64_event END_OF_INPUT %type <NONE> call sp_proc_stmts sp_proc_stmts1 sp_proc_stmt +%type <NONE> sp_proc_stmt_statement sp_proc_stmt_return +%type <NONE> sp_proc_stmt_if sp_proc_stmt_case_simple sp_proc_stmt_case +%type <NONE> sp_labeled_control sp_proc_stmt_unlabeled sp_proc_stmt_leave +%type <NONE> sp_proc_stmt_iterate sp_proc_stmt_label sp_proc_stmt_goto +%type <NONE> sp_proc_stmt_open sp_proc_stmt_fetch sp_proc_stmt_close + %type <num> sp_decl_idents sp_opt_inout sp_handler_type sp_hcond_list %type <spcondtype> sp_cond sp_hcond %type <spblock> sp_decls sp_decl @@ -878,6 +926,7 @@ statement: alter | analyze | backup + | binlog_base64_event | call | change | check @@ -895,11 +944,13 @@ statement: | handler | help | insert + | install | kill | load | lock | optimize | keycache + | partition_entry | preload | prepare | purge @@ -918,6 +969,7 @@ statement: | slave | start | truncate + | uninstall | unlock | update | use @@ -1152,7 +1204,7 @@ create: lex->change=NullS; bzero((char*) &lex->create_info,sizeof(lex->create_info)); lex->create_info.options=$2 | $4; - lex->create_info.db_type= (enum db_type) lex->thd->variables.table_type; + lex->create_info.db_type= lex->thd->variables.table_type; lex->create_info.default_table_charset= NULL; lex->name=0; } @@ -1170,11 +1222,15 @@ create: lex->col_list.empty(); lex->change=NullS; } - '(' key_list ')' + '(' key_list ')' opt_fulltext_parser { LEX *lex=Lex; - - lex->key_list.push_back(new Key($2,$4.str, $5, 0, lex->col_list)); + if ($2 != Key::FULLTEXT && $12) + { + yyerror(ER(ER_SYNTAX_ERROR)); + YYABORT; + } + lex->key_list.push_back(new Key($2,$4.str,$5,0,lex->col_list,$12)); lex->col_list.empty(); } | CREATE DATABASE opt_if_not_exists ident @@ -1219,7 +1275,7 @@ create: * stored procedure, otherwise yylex will chop it into pieces * at each ';'. */ - sp->m_old_cmq= YYTHD->client_capabilities & CLIENT_MULTI_QUERIES; + $<ulong_num>$= YYTHD->client_capabilities & CLIENT_MULTI_QUERIES; YYTHD->client_capabilities &= (~CLIENT_MULTI_QUERIES); } '(' @@ -1252,11 +1308,75 @@ create: YYABORT; sp->init_strings(YYTHD, lex, $3); lex->sql_command= SQLCOM_CREATE_PROCEDURE; - /* Restore flag if it was cleared above */ - if (sp->m_old_cmq) - YYTHD->client_capabilities |= CLIENT_MULTI_QUERIES; + + /* + Restore flag if it was cleared above + Be careful with counting. the block where we save the value + is $4. + */ + YYTHD->client_capabilities |= $<ulong_num>4; sp->restore_thd_mem_root(YYTHD); } + | CREATE EVENT_SYM opt_if_not_exists sp_name + /* + BE CAREFUL when you add a new rule to update the block where + YYTHD->client_capabilities is set back to original value + */ + { + LEX *lex=Lex; + + if (lex->et) + { + /* + Recursive events are not possible because recursive SPs + are not also possible. lex->sp_head is not stacked. + */ + // ToDo Andrey : Change the error message + my_error(ER_SP_NO_RECURSIVE_CREATE, MYF(0), "EVENT"); + YYABORT; + } + + lex->create_info.options= $3; + + if (!(lex->et= new Event_timed())) // implicitly calls Event_timed::init() + YYABORT; + + /* + We have to turn of CLIENT_MULTI_QUERIES while parsing a + stored procedure, otherwise yylex will chop it into pieces + at each ';'. + */ + $<ulong_num>$= YYTHD->client_capabilities & CLIENT_MULTI_QUERIES; + YYTHD->client_capabilities &= (~CLIENT_MULTI_QUERIES); + + if (!lex->et_compile_phase) + { + lex->et->init_name(YYTHD, $4); + lex->et->init_definer(YYTHD); + } + } + ON SCHEDULE_SYM ev_schedule_time + opt_ev_on_completion + opt_ev_status + opt_ev_comment + DO_SYM ev_sql_stmt + { + /* + Restore flag if it was cleared above + $1 - CREATE + $2 - EVENT_SYM + $3 - opt_if_not_exists + $4 - sp_name + $5 - the block above + */ + YYTHD->client_capabilities |= $<ulong_num>5; + + /* + sql_command is set here because some rules in ev_sql_stmt + can overwrite it + */ + Lex->sql_command= SQLCOM_CREATE_EVENT; + } | CREATE { Lex->create_view_mode= VIEW_CREATE_NEW; @@ -1269,8 +1389,240 @@ create: { Lex->sql_command = SQLCOM_CREATE_USER; } + | CREATE LOGFILE_SYM GROUP logfile_group_info + { + LEX *lex= Lex; + lex->alter_tablespace_info->ts_cmd_type= CREATE_LOGFILE_GROUP; + } + | CREATE TABLESPACE tablespace_info + { + LEX *lex= Lex; + lex->alter_tablespace_info->ts_cmd_type= CREATE_TABLESPACE; + } ; + +ev_schedule_time: EVERY_SYM expr interval + { + LEX *lex=Lex; + if (!lex->et_compile_phase) + { + switch (lex->et->init_interval(YYTHD , $2, $3)) { + case EVEX_PARSE_ERROR: + yyerror(ER(ER_SYNTAX_ERROR)); + YYABORT; + break; + case EVEX_BAD_PARAMS: + my_error(ER_EVENT_INTERVAL_NOT_POSITIVE_OR_TOO_BIG, MYF(0)); + case EVEX_MICROSECOND_UNSUP: + my_error(ER_NOT_SUPPORTED_YET, MYF(0), "MICROSECOND"); + YYABORT; + break; + } + } + } + ev_starts + ev_ends + | AT_SYM expr + { + LEX *lex=Lex; + if (!lex->et_compile_phase) + { + switch (lex->et->init_execute_at(YYTHD, $2)) { + case EVEX_PARSE_ERROR: + yyerror(ER(ER_SYNTAX_ERROR)); + YYABORT; + break; + case ER_WRONG_VALUE: + { + char buff[120]; + String str(buff,(uint32) sizeof(buff), system_charset_info); + String *str2= $2->val_str(&str); + my_error(ER_WRONG_VALUE, MYF(0), "AT", + str2? str2->c_ptr():"NULL"); + YYABORT; + break; + } + case EVEX_BAD_PARAMS: + my_error(ER_EVENT_EXEC_TIME_IN_THE_PAST, MYF(0)); + YYABORT; + break; + } + } + } + ; + +opt_ev_status: /* empty */ { $$= 0; } + | ENABLE_SYM + { + LEX *lex=Lex; + if (!lex->et_compile_phase) + lex->et->status= MYSQL_EVENT_ENABLED; + $$= 1; + } + | DISABLE_SYM + { + LEX *lex=Lex; + + if (!lex->et_compile_phase) + lex->et->status= MYSQL_EVENT_DISABLED; + $$= 1; + } + ; + +ev_starts: /* empty */ + { + Lex->et->init_starts(YYTHD, new Item_func_now_local()); + } + | STARTS_SYM expr + { + LEX *lex= Lex; + if (!lex->et_compile_phase) + { + + switch (lex->et->init_starts(YYTHD, $2)) { + case EVEX_PARSE_ERROR: + yyerror(ER(ER_SYNTAX_ERROR)); + YYABORT; + break; + case EVEX_BAD_PARAMS: + { + char buff[20]; + String str(buff,(uint32) sizeof(buff), system_charset_info); + String *str2= $2->val_str(&str); + my_error(ER_WRONG_VALUE, MYF(0), "STARTS", str2? str2->c_ptr(): + NULL); + YYABORT; + break; + } + } + } + } + ; + +ev_ends: /* empty */ + | ENDS_SYM expr + { + LEX *lex= Lex; + if (!lex->et_compile_phase) + { + switch (lex->et->init_ends(YYTHD, $2)) { + case EVEX_PARSE_ERROR: + yyerror(ER(ER_SYNTAX_ERROR)); + YYABORT; + break; + case EVEX_BAD_PARAMS: + my_error(ER_EVENT_ENDS_BEFORE_STARTS, MYF(0)); + YYABORT; + break; + } + } + } + ; + +opt_ev_on_completion: /* empty */ { $$= 0; } + | ev_on_completion + ; + +ev_on_completion: + ON COMPLETION_SYM PRESERVE_SYM + { + LEX *lex=Lex; + if (!lex->et_compile_phase) + lex->et->on_completion= MYSQL_EVENT_ON_COMPLETION_PRESERVE; + $$= 1; + } + | ON COMPLETION_SYM NOT_SYM PRESERVE_SYM + { + LEX *lex=Lex; + if (!lex->et_compile_phase) + lex->et->on_completion= MYSQL_EVENT_ON_COMPLETION_DROP; + $$= 1; + } + ; + +opt_ev_comment: /* empty */ { $$= 0; } + | COMMENT_SYM TEXT_STRING_sys + { + LEX *lex= Lex; + if (!lex->et_compile_phase) + { + lex->comment= $2; + lex->et->init_comment(YYTHD, &$2); + } + $$= 1; + } + ; + +ev_sql_stmt: + { + LEX *lex= Lex; + sp_head *sp; + + $<sphead>$= lex->sphead; + + if (!lex->sphead) + { + if (!(sp= new sp_head())) + YYABORT; + + sp->reset_thd_mem_root(YYTHD); + sp->init(lex); + + sp->m_type= TYPE_ENUM_PROCEDURE; + + lex->sphead= sp; + + bzero((char *)&lex->sp_chistics, sizeof(st_sp_chistics)); + lex->sphead->m_chistics= &lex->sp_chistics; + + lex->sphead->m_body_begin= lex->ptr; + } + + if (!lex->et_compile_phase) + lex->et->body_begin= lex->ptr; + } + ev_sql_stmt_inner + { + LEX *lex=Lex; + + if (!$<sphead>1) + { + sp_head *sp= lex->sphead; + // return back to the original memory root ASAP + sp->init_strings(YYTHD, lex, NULL); + sp->restore_thd_mem_root(YYTHD); + + lex->sp_chistics.suid= SP_IS_SUID;//always the definer! + + lex->et->sphead= lex->sphead; + lex->sphead= NULL; + } + if (!lex->et_compile_phase) + { + lex->et->init_body(YYTHD); + } + } + ; + +ev_sql_stmt_inner: + sp_proc_stmt_statement + | sp_proc_stmt_return + | sp_proc_stmt_if + | sp_proc_stmt_case_simple + | sp_proc_stmt_case + | sp_labeled_control + | sp_proc_stmt_unlabeled + | sp_proc_stmt_leave + | sp_proc_stmt_iterate + | sp_proc_stmt_label + | sp_proc_stmt_goto + | sp_proc_stmt_open + | sp_proc_stmt_fetch + | sp_proc_stmt_close + ; + + clear_privileges: /* Nothing */ { @@ -1314,7 +1666,7 @@ sp_name: ; create_function_tail: - RETURNS_SYM udf_type UDF_SONAME_SYM TEXT_STRING_sys + RETURNS_SYM udf_type SONAME_SYM TEXT_STRING_sys { LEX *lex=Lex; lex->sql_command = SQLCOM_CREATE_FUNCTION; @@ -1354,7 +1706,7 @@ create_function_tail: * stored procedure, otherwise yylex will chop it into pieces * at each ';'. */ - sp->m_old_cmq= YYTHD->client_capabilities & CLIENT_MULTI_QUERIES; + $<ulong_num>$= YYTHD->client_capabilities & CLIENT_MULTI_QUERIES; YYTHD->client_capabilities &= ~CLIENT_MULTI_QUERIES; lex->sphead->m_param_begin= lex->tok_start+1; } @@ -1409,8 +1761,7 @@ create_function_tail: YYABORT; } /* Restore flag if it was cleared above */ - if (sp->m_old_cmq) - YYTHD->client_capabilities |= CLIENT_MULTI_QUERIES; + YYTHD->client_capabilities |= $<ulong_num>2; sp->restore_thd_mem_root(YYTHD); } ; @@ -1943,6 +2294,29 @@ sp_opt_default: ; sp_proc_stmt: + sp_proc_stmt_statement + | sp_proc_stmt_return + | sp_proc_stmt_if + | sp_proc_stmt_case_simple + | sp_proc_stmt_case + | sp_labeled_control + | sp_proc_stmt_unlabeled + | sp_proc_stmt_leave + | sp_proc_stmt_iterate + | sp_proc_stmt_label + | sp_proc_stmt_goto + | sp_proc_stmt_open + | sp_proc_stmt_fetch + | sp_proc_stmt_close + ; + +sp_proc_stmt_if: + IF { Lex->sphead->new_cont_backpatch(NULL); } + sp_if END IF + { Lex->sphead->do_cont_backpatch(); } + ; + +sp_proc_stmt_statement: { LEX *lex= Lex; @@ -1985,7 +2359,10 @@ sp_proc_stmt: } sp->restore_lex(YYTHD); } - | RETURN_SYM + ; + +sp_proc_stmt_return: + RETURN_SYM { Lex->sphead->reset_lex(YYTHD); } expr { @@ -2008,17 +2385,19 @@ sp_proc_stmt: } sp->restore_lex(YYTHD); } - | IF - { Lex->sphead->new_cont_backpatch(NULL); } - sp_if END IF - { Lex->sphead->do_cont_backpatch(); } - | CASE_SYM WHEN_SYM + ; + +sp_proc_stmt_case_simple: + CASE_SYM WHEN_SYM { Lex->sphead->m_flags&= ~sp_head::IN_SIMPLE_CASE; Lex->sphead->new_cont_backpatch(NULL); } sp_case END CASE_SYM { Lex->sphead->do_cont_backpatch(); } - | CASE_SYM + ; + +sp_proc_stmt_case: + CASE_SYM { Lex->sphead->reset_lex(YYTHD); Lex->sphead->new_cont_backpatch(NULL); @@ -2049,9 +2428,10 @@ sp_proc_stmt: Lex->spcont->pop_case_expr_id(); Lex->sphead->do_cont_backpatch(); } - | sp_labeled_control - {} - | { /* Unlabeled controls get a secret label. */ + ; + +sp_proc_stmt_unlabeled: + { /* Unlabeled controls get a secret label. */ LEX *lex= Lex; lex->spcont->push_label((char *)"", lex->sphead->instructions()); @@ -2062,7 +2442,10 @@ sp_proc_stmt: lex->sphead->backpatch(lex->spcont->pop_label()); } - | LEAVE_SYM label_ident + ; + +sp_proc_stmt_leave: + LEAVE_SYM label_ident { LEX *lex= Lex; sp_head *sp = lex->sphead; @@ -2091,7 +2474,10 @@ sp_proc_stmt: sp->add_instr(i); } } - | ITERATE_SYM label_ident + ; + +sp_proc_stmt_iterate: + ITERATE_SYM label_ident { LEX *lex= Lex; sp_head *sp= lex->sphead; @@ -2119,7 +2505,10 @@ sp_proc_stmt: sp->add_instr(i); } } - | LABEL_SYM IDENT + ; + +sp_proc_stmt_label: + LABEL_SYM IDENT { #ifdef SP_GOTO LEX *lex= Lex; @@ -2144,7 +2533,10 @@ sp_proc_stmt: YYABORT; #endif } - | GOTO_SYM IDENT + ; + +sp_proc_stmt_goto: + GOTO_SYM IDENT { #ifdef SP_GOTO LEX *lex= Lex; @@ -2204,7 +2596,10 @@ sp_proc_stmt: YYABORT; #endif } - | OPEN_SYM ident + ; + +sp_proc_stmt_open: + OPEN_SYM ident { LEX *lex= Lex; sp_head *sp= lex->sphead; @@ -2219,7 +2614,10 @@ sp_proc_stmt: i= new sp_instr_copen(sp->instructions(), lex->spcont, offset); sp->add_instr(i); } - | FETCH_SYM sp_opt_fetch_noise ident INTO + ; + +sp_proc_stmt_fetch: + FETCH_SYM sp_opt_fetch_noise ident INTO { LEX *lex= Lex; sp_head *sp= lex->sphead; @@ -2236,7 +2634,10 @@ sp_proc_stmt: } sp_fetch_list { } - | CLOSE_SYM ident + ; + +sp_proc_stmt_close: + CLOSE_SYM ident { LEX *lex= Lex; sp_head *sp= lex->sphead; @@ -2550,10 +2951,388 @@ trg_event: | DELETE_SYM { Lex->trg_chistics.event= TRG_EVENT_DELETE; } ; +/* + This part of the parser contains common code for all TABLESPACE + commands. + CREATE TABLESPACE name ... + ALTER TABLESPACE name CHANGE DATAFILE ... + ALTER TABLESPACE name ADD DATAFILE ... + ALTER TABLESPACE name access_mode + CREATE LOGFILE GROUP name ... + ALTER LOGFILE GROUP name ADD UNDOFILE .. + ALTER LOGFILE GROUP name ADD REDOFILE .. + DROP TABLESPACE name + DROP LOGFILE GROUP name +*/ +change_tablespace_access: + tablespace_name + ts_access_mode + ; + +change_tablespace_info: + tablespace_name + CHANGE ts_datafile + change_ts_option_list + ; + +tablespace_info: + tablespace_name + ADD ts_datafile + opt_logfile_group_name + tablespace_option_list + ; + +opt_logfile_group_name: + /* empty */ {} + | USE_SYM LOGFILE_SYM GROUP ident + { + LEX *lex= Lex; + lex->alter_tablespace_info->logfile_group_name= $4.str; + }; + +alter_tablespace_info: + tablespace_name + ADD ts_datafile + alter_tablespace_option_list + { + Lex->alter_tablespace_info->ts_alter_tablespace_type= ALTER_TABLESPACE_ADD_FILE; + } + | + tablespace_name + DROP ts_datafile + alter_tablespace_option_list + { + Lex->alter_tablespace_info->ts_alter_tablespace_type= ALTER_TABLESPACE_DROP_FILE; + }; + +logfile_group_info: + logfile_group_name + add_log_file + logfile_group_option_list + ; + +alter_logfile_group_info: + logfile_group_name + add_log_file + alter_logfile_group_option_list + ; + +add_log_file: + ADD lg_undofile + | ADD lg_redofile + ; + +change_ts_option_list: + /* empty */ {} + change_ts_options + ; + +change_ts_options: + change_ts_option + | change_ts_options change_ts_option + | change_ts_options ',' change_ts_option + ; + +change_ts_option: + opt_ts_initial_size + | opt_ts_autoextend_size + | opt_ts_max_size + ; + +tablespace_option_list: + /* empty */ {} + tablespace_options + ; + +tablespace_options: + tablespace_option + | tablespace_options tablespace_option + | tablespace_options ',' tablespace_option + ; + +tablespace_option: + opt_ts_initial_size + | opt_ts_autoextend_size + | opt_ts_max_size + | opt_ts_extent_size + | opt_ts_nodegroup + | opt_ts_engine + | ts_wait + | opt_ts_comment + ; + +alter_tablespace_option_list: + /* empty */ {} + alter_tablespace_options + ; + +alter_tablespace_options: + alter_tablespace_option + | alter_tablespace_options alter_tablespace_option + | alter_tablespace_options ',' alter_tablespace_option + ; + +alter_tablespace_option: + opt_ts_initial_size + | opt_ts_autoextend_size + | opt_ts_max_size + | opt_ts_engine + | ts_wait + ; + +logfile_group_option_list: + /* empty */ {} + logfile_group_options + ; + +logfile_group_options: + logfile_group_option + | logfile_group_options logfile_group_option + | logfile_group_options ',' logfile_group_option + ; + +logfile_group_option: + opt_ts_initial_size + | opt_ts_undo_buffer_size + | opt_ts_redo_buffer_size + | opt_ts_nodegroup + | opt_ts_engine + | ts_wait + | opt_ts_comment + ; + +alter_logfile_group_option_list: + /* empty */ {} + alter_logfile_group_options + ; + +alter_logfile_group_options: + alter_logfile_group_option + | alter_logfile_group_options alter_logfile_group_option + | alter_logfile_group_options ',' alter_logfile_group_option + ; + +alter_logfile_group_option: + opt_ts_initial_size + | opt_ts_engine + | ts_wait + ; + + +ts_datafile: + DATAFILE_SYM TEXT_STRING_sys + { + LEX *lex= Lex; + lex->alter_tablespace_info->data_file_name= $2.str; + }; + +lg_undofile: + UNDOFILE_SYM TEXT_STRING_sys + { + LEX *lex= Lex; + lex->alter_tablespace_info->undo_file_name= $2.str; + }; + +lg_redofile: + REDOFILE_SYM TEXT_STRING_sys + { + LEX *lex= Lex; + lex->alter_tablespace_info->redo_file_name= $2.str; + }; + +tablespace_name: + ident + { + LEX *lex= Lex; + lex->alter_tablespace_info= new st_alter_tablespace(); + lex->alter_tablespace_info->tablespace_name= $1.str; + lex->sql_command= SQLCOM_ALTER_TABLESPACE; + }; + +logfile_group_name: + ident + { + LEX *lex= Lex; + lex->alter_tablespace_info= new st_alter_tablespace(); + lex->alter_tablespace_info->logfile_group_name= $1.str; + lex->sql_command= SQLCOM_ALTER_TABLESPACE; + }; + +ts_access_mode: + READ_ONLY_SYM + { + LEX *lex= Lex; + lex->alter_tablespace_info->ts_access_mode= TS_READ_ONLY; + } + | READ_WRITE_SYM + { + LEX *lex= Lex; + lex->alter_tablespace_info->ts_access_mode= TS_READ_WRITE; + } + | NOT_SYM ACCESSIBLE_SYM + { + LEX *lex= Lex; + lex->alter_tablespace_info->ts_access_mode= TS_NOT_ACCESSIBLE; + }; + +opt_ts_initial_size: + INITIAL_SIZE_SYM opt_equal size_number + { + LEX *lex= Lex; + lex->alter_tablespace_info->initial_size= $3; + }; + +opt_ts_autoextend_size: + AUTOEXTEND_SIZE_SYM opt_equal size_number + { + LEX *lex= Lex; + lex->alter_tablespace_info->autoextend_size= $3; + }; + +opt_ts_max_size: + MAX_SIZE_SYM opt_equal size_number + { + LEX *lex= Lex; + lex->alter_tablespace_info->max_size= $3; + }; + +opt_ts_extent_size: + EXTENT_SIZE_SYM opt_equal size_number + { + LEX *lex= Lex; + lex->alter_tablespace_info->extent_size= $3; + }; + +opt_ts_undo_buffer_size: + UNDO_BUFFER_SIZE_SYM opt_equal size_number + { + LEX *lex= Lex; + lex->alter_tablespace_info->undo_buffer_size= $3; + }; + +opt_ts_redo_buffer_size: + REDO_BUFFER_SIZE_SYM opt_equal size_number + { + LEX *lex= Lex; + lex->alter_tablespace_info->redo_buffer_size= $3; + }; + +opt_ts_nodegroup: + NODEGROUP_SYM opt_equal ulong_num + { + LEX *lex= Lex; + if (lex->alter_tablespace_info->nodegroup_id != UNDEF_NODEGROUP) + { + my_error(ER_FILEGROUP_OPTION_ONLY_ONCE,MYF(0),"NODEGROUP"); + YYABORT; + } + lex->alter_tablespace_info->nodegroup_id= $3; + }; + +opt_ts_comment: + COMMENT_SYM opt_equal TEXT_STRING_sys + { + LEX *lex= Lex; + if (lex->alter_tablespace_info->ts_comment != NULL) + { + my_error(ER_FILEGROUP_OPTION_ONLY_ONCE,MYF(0),"COMMENT"); + YYABORT; + } + lex->alter_tablespace_info->ts_comment= $3.str; + }; + +opt_ts_engine: + opt_storage ENGINE_SYM opt_equal storage_engines + { + LEX *lex= Lex; + if (lex->alter_tablespace_info->storage_engine != DB_TYPE_UNKNOWN) + { + my_error(ER_FILEGROUP_OPTION_ONLY_ONCE,MYF(0), + "STORAGE ENGINE"); + YYABORT; + } + lex->alter_tablespace_info->storage_engine= $4->db_type; + }; + +opt_ts_wait: + /* empty */ + | ts_wait + ; + +ts_wait: + WAIT_SYM + { + LEX *lex= Lex; + lex->alter_tablespace_info->wait_until_completed= TRUE; + } + | NO_WAIT_SYM + { + LEX *lex= Lex; + if (!(lex->alter_tablespace_info->wait_until_completed)) + { + my_error(ER_FILEGROUP_OPTION_ONLY_ONCE,MYF(0),"NO_WAIT"); + YYABORT; + } + lex->alter_tablespace_info->wait_until_completed= FALSE; + }; + +size_number: + ulong_num { $$= $1;} + | IDENT + { + ulonglong number, test_number; + uint text_shift_number= 0; + longlong prefix_number; + char *start_ptr= $1.str; + uint str_len= strlen(start_ptr); + char *end_ptr= start_ptr + str_len; + int error; + prefix_number= my_strtoll10(start_ptr, &end_ptr, &error); + if ((start_ptr + str_len - 1) == end_ptr) + { + switch (end_ptr[0]) + { + case 'g': + case 'G': + text_shift_number+=10; + case 'm': + case 'M': + text_shift_number+=10; + case 'k': + case 'K': + text_shift_number+=10; + break; + default: + { + my_error(ER_WRONG_SIZE_NUMBER, MYF(0)); + YYABORT; + } + } + if (prefix_number >> 31) + { + my_error(ER_SIZE_OVERFLOW_ERROR, MYF(0)); + YYABORT; + } + number= prefix_number << text_shift_number; + } + else + { + my_error(ER_WRONG_SIZE_NUMBER, MYF(0)); + YYABORT; + } + $$= number; + } + ; + +/* + End tablespace part +*/ create2: '(' create2a {} - | opt_create_table_options create3 {} + | opt_create_table_options + opt_partitioning {} + create3 {} | LIKE table_ident { LEX *lex=Lex; @@ -2569,8 +3348,12 @@ create2: ; create2a: - field_list ')' opt_create_table_options create3 {} - | create_select ')' { Select->set_braces(1);} union_opt {} + field_list ')' opt_create_table_options + opt_partitioning {} + create3 {} + | opt_partitioning {} + create_select ')' + { Select->set_braces(1);} union_opt {} ; create3: @@ -2581,6 +3364,569 @@ create3: { Select->set_braces(1);} union_opt {} ; +/* + This part of the parser is about handling of the partition information. + + It's first version was written by Mikael Ronström with lots of answers to + questions provided by Antony Curtis. + + The partition grammar can be called from three places. + 1) CREATE TABLE ... PARTITION .. + 2) ALTER TABLE table_name PARTITION ... + 3) PARTITION ... + + The first place is called when a new table is created from a MySQL client. + The second place is called when a table is altered with the ALTER TABLE + command from a MySQL client. + The third place is called when opening an frm file and finding partition + info in the .frm file. It is necessary to avoid allowing PARTITION to be + an allowed entry point for SQL client queries. This is arranged by setting + some state variables before arriving here. + + To be able to handle errors we will only set error code in this code + and handle the error condition in the function calling the parser. This + is necessary to ensure we can also handle errors when calling the parser + from the openfrm function. +*/ +opt_partitioning: + /* empty */ {} + | partitioning + ; + +partitioning: + PARTITION_SYM + { + LEX *lex= Lex; + lex->part_info= new partition_info(); + if (!lex->part_info) + { + mem_alloc_error(sizeof(partition_info)); + YYABORT; + } + if (lex->sql_command == SQLCOM_ALTER_TABLE) + { + lex->alter_info.flags|= ALTER_PARTITION; + } + } + partition + ; + +partition_entry: + PARTITION_SYM + { + LEX *lex= Lex; + if (!lex->part_info) + { + yyerror(ER(ER_PARTITION_ENTRY_ERROR)); + YYABORT; + } + /* + We enter here when opening the frm file to translate + partition info string into part_info data structure. + */ + } + partition {} + ; + +partition: + BY part_type_def opt_no_parts {} opt_sub_part {} part_defs + ; + +part_type_def: + opt_linear KEY_SYM '(' part_field_list ')' + { + LEX *lex= Lex; + lex->part_info->list_of_part_fields= TRUE; + lex->part_info->part_type= HASH_PARTITION; + } + | opt_linear HASH_SYM + { Lex->part_info->part_type= HASH_PARTITION; } + part_func {} + | RANGE_SYM + { Lex->part_info->part_type= RANGE_PARTITION; } + part_func {} + | LIST_SYM + { Lex->part_info->part_type= LIST_PARTITION; } + part_func {} + ; + +opt_linear: + /* empty */ {} + | LINEAR_SYM + { Lex->part_info->linear_hash_ind= TRUE;} + ; + +part_field_list: + /* empty */ {} + | part_field_item_list {} + ; + +part_field_item_list: + part_field_item {} + | part_field_item_list ',' part_field_item {} + ; + +part_field_item: + ident + { + if (Lex->part_info->part_field_list.push_back($1.str)) + { + mem_alloc_error(1); + YYABORT; + } + } + ; + +part_func: + '(' remember_name part_func_expr remember_end ')' + { + LEX *lex= Lex; + uint expr_len= (uint)($4 - $2) - 1; + lex->part_info->list_of_part_fields= FALSE; + lex->part_info->part_expr= $3; + lex->part_info->part_func_string= (char* ) sql_memdup($2+1, expr_len); + lex->part_info->part_func_len= expr_len; + } + ; + +sub_part_func: + '(' remember_name part_func_expr remember_end ')' + { + LEX *lex= Lex; + uint expr_len= (uint)($4 - $2) - 1; + lex->part_info->list_of_subpart_fields= FALSE; + lex->part_info->subpart_expr= $3; + lex->part_info->subpart_func_string= (char* ) sql_memdup($2+1, expr_len); + lex->part_info->subpart_func_len= expr_len; + } + ; + + +opt_no_parts: + /* empty */ {} + | PARTITIONS_SYM ulong_num + { + uint no_parts= $2; + LEX *lex= Lex; + if (no_parts == 0) + { + my_error(ER_NO_PARTS_ERROR, MYF(0), "partitions"); + YYABORT; + } + + lex->part_info->no_parts= no_parts; + lex->part_info->use_default_no_partitions= FALSE; + } + ; + +opt_sub_part: + /* empty */ {} + | SUBPARTITION_SYM BY opt_linear HASH_SYM sub_part_func + { Lex->part_info->subpart_type= HASH_PARTITION; } + opt_no_subparts {} + | SUBPARTITION_SYM BY opt_linear KEY_SYM + '(' sub_part_field_list ')' + { + LEX *lex= Lex; + lex->part_info->subpart_type= HASH_PARTITION; + lex->part_info->list_of_subpart_fields= TRUE; + } + opt_no_subparts {} + ; + +sub_part_field_list: + sub_part_field_item {} + | sub_part_field_list ',' sub_part_field_item {} + ; + +sub_part_field_item: + ident + { + if (Lex->part_info->subpart_field_list.push_back($1.str)) + { + mem_alloc_error(1); + YYABORT; + } + } + ; + +part_func_expr: + bit_expr + { + LEX *lex= Lex; + bool not_corr_func; + not_corr_func= !lex->safe_to_cache_query; + lex->safe_to_cache_query= 1; + if (not_corr_func) + { + yyerror(ER(ER_CONST_EXPR_IN_PARTITION_FUNC_ERROR)); + YYABORT; + } + $$=$1; + } + ; + +opt_no_subparts: + /* empty */ {} + | SUBPARTITIONS_SYM ulong_num + { + uint no_parts= $2; + LEX *lex= Lex; + if (no_parts == 0) + { + my_error(ER_NO_PARTS_ERROR, MYF(0), "subpartitions"); + YYABORT; + } + lex->part_info->no_subparts= no_parts; + lex->part_info->use_default_no_subpartitions= FALSE; + } + ; + +part_defs: + /* empty */ + {} + | '(' part_def_list ')' + { + LEX *lex= Lex; + partition_info *part_info= lex->part_info; + uint count_curr_parts= part_info->partitions.elements; + if (part_info->no_parts != 0) + { + if (part_info->no_parts != + count_curr_parts) + { + yyerror(ER(ER_PARTITION_WRONG_NO_PART_ERROR)); + YYABORT; + } + } + else if (count_curr_parts > 0) + { + part_info->no_parts= count_curr_parts; + } + part_info->count_curr_subparts= 0; + } + ; + +part_def_list: + part_definition {} + | part_def_list ',' part_definition {} + ; + +part_definition: + PARTITION_SYM + { + LEX *lex= Lex; + partition_info *part_info= lex->part_info; + partition_element *p_elem= new partition_element(); + uint part_id= part_info->partitions.elements + + part_info->temp_partitions.elements; + enum partition_state part_state; + + if (part_info->part_state) + part_state= (enum partition_state)part_info->part_state[part_id]; + else + part_state= PART_NORMAL; + switch (part_state) + { + case PART_TO_BE_DROPPED: + /* + This part is currently removed so we keep it in a + temporary list for REPAIR TABLE to be able to handle + failures during drop partition process. + */ + case PART_TO_BE_ADDED: + /* + This part is currently being added so we keep it in a + temporary list for REPAIR TABLE to be able to handle + failures during add partition process. + */ + if (!p_elem || part_info->temp_partitions.push_back(p_elem)) + { + mem_alloc_error(sizeof(partition_element)); + YYABORT; + } + break; + case PART_IS_ADDED: + /* + Part has been added and is now a normal partition + */ + case PART_TO_BE_REORGED: + /* + This part is currently reorganised, it is still however + used so we keep it in the list of partitions. We do + however need the state to be able to handle REPAIR TABLE + after failures in the reorganisation process. + */ + case PART_REORGED_DROPPED: + /* + This part is currently reorganised as part of a + COALESCE PARTITION and it will be dropped without a new + replacement partition after completing the reorganisation. + */ + case PART_CHANGED: + /* + This part is currently split or merged as part of ADD + PARTITION for a hash partition or as part of COALESCE + PARTITION for a hash partitioned table. + */ + case PART_IS_CHANGED: + /* + This part has been split or merged as part of ADD + PARTITION for a hash partition or as part of COALESCE + PARTITION for a hash partitioned table. + */ + case PART_NORMAL: + if (!p_elem || part_info->partitions.push_back(p_elem)) + { + mem_alloc_error(sizeof(partition_element)); + YYABORT; + } + break; + default: + mem_alloc_error((part_id * 1000) + part_state); + YYABORT; + } + p_elem->part_state= part_state; + part_info->curr_part_elem= p_elem; + part_info->current_partition= p_elem; + part_info->use_default_partitions= FALSE; + part_info->use_default_no_partitions= FALSE; + } + part_name {} + opt_part_values {} + opt_part_options {} + opt_sub_partition {} + ; + +part_name: + ident_or_text + { + LEX *lex= Lex; + partition_info *part_info= lex->part_info; + partition_element *p_elem= part_info->curr_part_elem; + p_elem->partition_name= $1.str; + } + ; + +opt_part_values: + /* empty */ + { + LEX *lex= Lex; + if (!is_partition_management(lex)) + { + if (lex->part_info->part_type == RANGE_PARTITION) + { + my_error(ER_PARTITION_REQUIRES_VALUES_ERROR, MYF(0), + "RANGE", "LESS THAN"); + YYABORT; + } + if (lex->part_info->part_type == LIST_PARTITION) + { + my_error(ER_PARTITION_REQUIRES_VALUES_ERROR, MYF(0), + "LIST", "IN"); + YYABORT; + } + } + } + | VALUES LESS_SYM THAN_SYM part_func_max + { + LEX *lex= Lex; + if (!is_partition_management(lex)) + { + if (Lex->part_info->part_type != RANGE_PARTITION) + { + my_error(ER_PARTITION_WRONG_VALUES_ERROR, MYF(0), + "RANGE", "LESS THAN"); + YYABORT; + } + } + } + | VALUES IN_SYM '(' part_list_func ')' + { + LEX *lex= Lex; + if (!is_partition_management(lex)) + { + if (Lex->part_info->part_type != LIST_PARTITION) + { + my_error(ER_PARTITION_WRONG_VALUES_ERROR, MYF(0), + "LIST", "IN"); + YYABORT; + } + } + } + ; + +part_func_max: + MAX_VALUE_SYM + { + LEX *lex= Lex; + if (lex->part_info->defined_max_value) + { + yyerror(ER(ER_PARTITION_MAXVALUE_ERROR)); + YYABORT; + } + lex->part_info->defined_max_value= TRUE; + lex->part_info->curr_part_elem->range_value= LONGLONG_MAX; + } + | part_range_func + { + if (Lex->part_info->defined_max_value) + { + yyerror(ER(ER_PARTITION_MAXVALUE_ERROR)); + YYABORT; + } + } + ; + +part_range_func: + '(' part_bit_expr ')' + { + Lex->part_info->curr_part_elem->range_value= $2; + } + ; + +part_list_func: + part_list_item {} + | part_list_func ',' part_list_item {} + ; + +part_list_item: + part_bit_expr + { + longlong *value_ptr; + if (!(value_ptr= (longlong*)sql_alloc(sizeof(longlong))) || + ((*value_ptr= $1, FALSE) || + Lex->part_info->curr_part_elem->list_val_list.push_back(value_ptr))) + { + mem_alloc_error(sizeof(longlong)); + YYABORT; + } + } + ; + +part_bit_expr: + bit_expr + { + Item *part_expr= $1; + bool not_corr_func; + LEX *lex= Lex; + THD *thd= YYTHD; + longlong item_value; + Name_resolution_context *context= &lex->current_select->context; + TABLE_LIST *save_list= context->table_list; + const char *save_where= thd->where; + + context->table_list= 0; + thd->where= "partition function"; + if (part_expr->fix_fields(YYTHD, (Item**)0) || + ((context->table_list= save_list), FALSE) || + (!part_expr->const_item()) || + (!lex->safe_to_cache_query)) + { + yyerror(ER(ER_NO_CONST_EXPR_IN_RANGE_OR_LIST_ERROR)); + YYABORT; + } + thd->where= save_where; + if (part_expr->result_type() != INT_RESULT) + { + yyerror(ER(ER_INCONSISTENT_TYPE_OF_FUNCTIONS_ERROR)); + YYABORT; + } + item_value= part_expr->val_int(); + $$= item_value; + } + ; + +opt_sub_partition: + /* empty */ {} + | '(' sub_part_list ')' + { + LEX *lex= Lex; + partition_info *part_info= lex->part_info; + if (part_info->no_subparts != 0) + { + if (part_info->no_subparts != + part_info->count_curr_subparts) + { + yyerror(ER(ER_PARTITION_WRONG_NO_SUBPART_ERROR)); + YYABORT; + } + } + else if (part_info->count_curr_subparts > 0) + { + part_info->no_subparts= part_info->count_curr_subparts; + } + part_info->count_curr_subparts= 0; + } + ; + +sub_part_list: + sub_part_definition {} + | sub_part_list ',' sub_part_definition {} + ; + +sub_part_definition: + SUBPARTITION_SYM + { + LEX *lex= Lex; + partition_info *part_info= lex->part_info; + partition_element *p_elem= new partition_element(); + if (!p_elem || + part_info->current_partition->subpartitions.push_back(p_elem)) + { + mem_alloc_error(sizeof(partition_element)); + YYABORT; + } + part_info->curr_part_elem= p_elem; + part_info->use_default_subpartitions= FALSE; + part_info->use_default_no_subpartitions= FALSE; + part_info->count_curr_subparts++; + } + sub_name opt_part_options {} + ; + +sub_name: + ident_or_text + { Lex->part_info->curr_part_elem->partition_name= $1.str; } + ; + +opt_part_options: + /* empty */ {} + | opt_part_option_list {} + ; + +opt_part_option_list: + opt_part_option_list opt_part_option {} + | opt_part_option {} + ; + +opt_part_option: + TABLESPACE opt_equal ident_or_text + { Lex->part_info->curr_part_elem->tablespace_name= $3.str; } + | opt_storage ENGINE_SYM opt_equal storage_engines + { + LEX *lex= Lex; + lex->part_info->curr_part_elem->engine_type= $4; + lex->part_info->default_engine_type= $4; + } + | NODEGROUP_SYM opt_equal ulong_num + { Lex->part_info->curr_part_elem->nodegroup_id= $3; } + | MAX_ROWS opt_equal ulonglong_num + { Lex->part_info->curr_part_elem->part_max_rows= $3; } + | MIN_ROWS opt_equal ulonglong_num + { Lex->part_info->curr_part_elem->part_min_rows= $3; } + | DATA_SYM DIRECTORY_SYM opt_equal TEXT_STRING_sys + { Lex->part_info->curr_part_elem->data_file_name= $4.str; } + | INDEX_SYM DIRECTORY_SYM opt_equal TEXT_STRING_sys + { Lex->part_info->curr_part_elem->index_file_name= $4.str; } + | COMMENT_SYM opt_equal TEXT_STRING_sys + { Lex->part_info->curr_part_elem->part_comment= $3.str; } + ; + +/* + End of partition parser part +*/ + create_select: SELECT_SYM { @@ -2658,7 +4004,13 @@ create_table_options: create_table_option: ENGINE_SYM opt_equal storage_engines { Lex->create_info.db_type= $3; Lex->create_info.used_fields|= HA_CREATE_USED_ENGINE; } - | TYPE_SYM opt_equal storage_engines { Lex->create_info.db_type= $3; WARN_DEPRECATED("TYPE=storage_engine","ENGINE=storage_engine"); Lex->create_info.used_fields|= HA_CREATE_USED_ENGINE; } + | TYPE_SYM opt_equal storage_engines + { + Lex->create_info.db_type= $3; + WARN_DEPRECATED(yythd, "5.2", "TYPE=storage_engine", + "'ENGINE=storage_engine'"); + Lex->create_info.used_fields|= HA_CREATE_USED_ENGINE; + } | MAX_ROWS opt_equal ulonglong_num { Lex->create_info.max_rows= $3; Lex->create_info.used_fields|= HA_CREATE_USED_MAX_ROWS;} | MIN_ROWS opt_equal ulonglong_num { Lex->create_info.min_rows= $3; Lex->create_info.used_fields|= HA_CREATE_USED_MIN_ROWS;} | AVG_ROW_LENGTH opt_equal ulong_num { Lex->create_info.avg_row_length=$3; Lex->create_info.used_fields|= HA_CREATE_USED_AVG_ROW_LENGTH;} @@ -2689,21 +4041,6 @@ create_table_option: | CHECKSUM_SYM opt_equal ulong_num { Lex->create_info.table_options|= $3 ? HA_OPTION_CHECKSUM : HA_OPTION_NO_CHECKSUM; Lex->create_info.used_fields|= HA_CREATE_USED_CHECKSUM; } | DELAY_KEY_WRITE_SYM opt_equal ulong_num { Lex->create_info.table_options|= $3 ? HA_OPTION_DELAY_KEY_WRITE : HA_OPTION_NO_DELAY_KEY_WRITE; Lex->create_info.used_fields|= HA_CREATE_USED_DELAY_KEY_WRITE; } | ROW_FORMAT_SYM opt_equal row_types { Lex->create_info.row_type= $3; Lex->create_info.used_fields|= HA_CREATE_USED_ROW_FORMAT; } - | RAID_TYPE opt_equal raid_types - { - my_error(ER_WARN_DEPRECATED_SYNTAX, MYF(0), "RAID_TYPE", "PARTITION"); - YYABORT; - } - | RAID_CHUNKS opt_equal ulong_num - { - my_error(ER_WARN_DEPRECATED_SYNTAX, MYF(0), "RAID_CHUNKS", "PARTITION"); - YYABORT; - } - | RAID_CHUNKSIZE opt_equal ulong_num - { - my_error(ER_WARN_DEPRECATED_SYNTAX, MYF(0), "RAID_CHUNKSIZE", "PARTITION"); - YYABORT; - } | UNION_SYM opt_equal '(' table_list ')' { /* Move the union list to the merge_list */ @@ -2724,6 +4061,9 @@ create_table_option: | INSERT_METHOD opt_equal merge_insert_types { Lex->create_info.merge_insert_method= $3; Lex->create_info.used_fields|= HA_CREATE_USED_INSERT_METHOD;} | DATA_SYM DIRECTORY_SYM opt_equal TEXT_STRING_sys { Lex->create_info.data_file_name= $4.str; Lex->create_info.used_fields|= HA_CREATE_USED_DATADIR; } | INDEX_SYM DIRECTORY_SYM opt_equal TEXT_STRING_sys { Lex->create_info.index_file_name= $4.str; Lex->create_info.used_fields|= HA_CREATE_USED_INDEXDIR; } + | TABLESPACE ident {Lex->create_info.tablespace= $2.str;} + | STORAGE_SYM DISK_SYM {Lex->create_info.store_on_disk= TRUE;} + | STORAGE_SYM MEMORY_SYM {Lex->create_info.store_on_disk= FALSE;} | CONNECTION_SYM opt_equal TEXT_STRING_sys { Lex->create_info.connect_string.str= $3.str; Lex->create_info.connect_string.length= $3.length; Lex->create_info.used_fields|= HA_CREATE_USED_CONNECTION; } ; @@ -2763,8 +4103,10 @@ default_collation: storage_engines: ident_or_text { - $$ = ha_resolve_by_name($1.str,$1.length); - if ($$ == DB_TYPE_UNKNOWN) { + $$ = ha_resolve_by_name(YYTHD, &$1); + if ($$ == NULL && + test(YYTHD->variables.sql_mode & MODE_NO_ENGINE_SUBSTITUTION)) + { my_error(ER_UNKNOWN_STORAGE_ENGINE, MYF(0), $1.str); YYABORT; } @@ -2778,11 +4120,6 @@ row_types: | REDUNDANT_SYM { $$= ROW_TYPE_REDUNDANT; } | COMPACT_SYM { $$= ROW_TYPE_COMPACT; }; -raid_types: - RAID_STRIPED_SYM { $$= RAID_TYPE_0; } - | RAID_0_SYM { $$= RAID_TYPE_0; } - | ulong_num { $$=$1;}; - merge_insert_types: NO_SYM { $$= MERGE_INSERT_DISABLED; } | FIRST_SYM { $$= MERGE_INSERT_TO_FIRST; } @@ -2821,10 +4158,15 @@ column_def: ; key_def: - key_type opt_ident key_alg '(' key_list ')' + key_type opt_ident key_alg '(' key_list ')' opt_fulltext_parser { LEX *lex=Lex; - lex->key_list.push_back(new Key($1,$2, $3, 0, lex->col_list)); + if ($1 != Key::FULLTEXT && $7) + { + yyerror(ER(ER_SYNTAX_ERROR)); + YYABORT; + } + lex->key_list.push_back(new Key($1,$2, $3, 0, lex->col_list, $7)); lex->col_list.empty(); /* Alloced by sql_alloc */ } | opt_constraint constraint_key_type opt_ident key_alg '(' key_list ')' @@ -2859,6 +4201,20 @@ key_def: } ; +opt_fulltext_parser: + /* empty */ { $$= (LEX_STRING *)0; } + | WITH PARSER_SYM IDENT_sys + { + if (plugin_is_ready(&$3, MYSQL_FTPARSER_PLUGIN)) + $$= (LEX_STRING *)sql_memdup(&$3, sizeof(LEX_STRING)); + else + { + my_error(ER_FUNCTION_NOT_DEFINED, MYF(0), $3.str); + YYABORT; + } + } + ; + opt_check_constraint: /* empty */ | check_constraint @@ -3394,13 +4750,13 @@ alter: lex->select_lex.init_order(); lex->select_lex.db=lex->name=0; bzero((char*) &lex->create_info,sizeof(lex->create_info)); - lex->create_info.db_type= DB_TYPE_DEFAULT; + lex->create_info.db_type= (handlerton*) &default_hton; lex->create_info.default_table_charset= NULL; lex->create_info.row_type= ROW_TYPE_NOT_USED; lex->alter_info.reset(); lex->alter_info.flags= 0; } - alter_list + alter_commands {} | ALTER DATABASE ident_or_empty { @@ -3461,17 +4817,280 @@ alter: } view_list_opt AS view_select view_check_option {} + | ALTER EVENT_SYM sp_name + /* + BE CAREFUL when you add a new rule to update the block where + YYTHD->client_capabilities is set back to original value + */ + { + LEX *lex=Lex; + Event_timed *et; + + if (lex->et) + { + /* + Recursive events are not possible because recursive SPs + are not also possible. lex->sp_head is not stacked. + */ + my_error(ER_SP_NO_RECURSIVE_CREATE, MYF(0), "EVENT"); + YYABORT; + } + lex->spname= 0;//defensive programming + + if (!(et= new Event_timed()))// implicitly calls Event_timed::init() + YYABORT; + lex->et = et; + + if (!lex->et_compile_phase) + { + et->init_definer(YYTHD); + et->init_name(YYTHD, $3); + } + + /* + We have to turn of CLIENT_MULTI_QUERIES while parsing a + stored procedure, otherwise yylex will chop it into pieces + at each ';'. + */ + $<ulong_num>$= YYTHD->client_capabilities & CLIENT_MULTI_QUERIES; + YYTHD->client_capabilities &= ~CLIENT_MULTI_QUERIES; + } + ev_alter_on_schedule_completion + opt_ev_rename_to + opt_ev_status + opt_ev_comment + opt_ev_sql_stmt + { + /* + $1 - ALTER + $2 - EVENT_SYM + $3 - sp_name + $4 - the block above + */ + YYTHD->client_capabilities |= $<ulong_num>4; + + /* + sql_command is set here because some rules in ev_sql_stmt + can overwrite it + */ + if (!($5 || $6 || $7 || $8 || $9)) + { + yyerror(ER(ER_SYNTAX_ERROR)); + YYABORT; + } + Lex->sql_command= SQLCOM_ALTER_EVENT; + } + | ALTER TABLESPACE alter_tablespace_info + { + LEX *lex= Lex; + lex->alter_tablespace_info->ts_cmd_type= ALTER_TABLESPACE; + } + | ALTER LOGFILE_SYM GROUP alter_logfile_group_info + { + LEX *lex= Lex; + lex->alter_tablespace_info->ts_cmd_type= ALTER_LOGFILE_GROUP; + } + | ALTER TABLESPACE change_tablespace_info + { + LEX *lex= Lex; + lex->alter_tablespace_info->ts_cmd_type= CHANGE_FILE_TABLESPACE; + } + | ALTER TABLESPACE change_tablespace_access + { + LEX *lex= Lex; + lex->alter_tablespace_info->ts_cmd_type= ALTER_ACCESS_MODE_TABLESPACE; + } ; +ev_alter_on_schedule_completion: /* empty */ { $$= 0;} + | ON SCHEDULE_SYM ev_schedule_time { $$= 1; } + | ev_on_completion { $$= 1; } + | ON SCHEDULE_SYM ev_schedule_time ev_on_completion { $$= 1; } + ; + +opt_ev_rename_to: /* empty */ { $$= 0;} + | RENAME TO_SYM sp_name + { + LEX *lex=Lex; + lex->spname= $3; //use lex's spname to hold the new name + //the original name is in the Event_timed object + $$= 1; + } + ; + +opt_ev_sql_stmt: /* empty*/ { $$= 0;} + | DO_SYM ev_sql_stmt { $$= 1; } + ; + + ident_or_empty: /* empty */ { $$= 0; } | ident { $$= $1.str; }; -alter_list: +alter_commands: | DISCARD TABLESPACE { Lex->alter_info.tablespace_op= DISCARD_TABLESPACE; } | IMPORT TABLESPACE { Lex->alter_info.tablespace_op= IMPORT_TABLESPACE; } - | alter_list_item - | alter_list ',' alter_list_item; + | alter_list + opt_partitioning + | partitioning +/* + This part was added for release 5.1 by Mikael Ronström. + From here we insert a number of commands to manage the partitions of a + partitioned table such as adding partitions, dropping partitions, + reorganising partitions in various manners. In future releases the list + will be longer and also include moving partitions to a + new table and so forth. +*/ + | add_partition_rule + | DROP PARTITION_SYM alt_part_name_list + { + Lex->alter_info.flags|= ALTER_DROP_PARTITION; + } + | REBUILD_SYM PARTITION_SYM opt_no_write_to_binlog + all_or_alt_part_name_list + { + LEX *lex= Lex; + lex->alter_info.flags|= ALTER_REBUILD_PARTITION; + lex->no_write_to_binlog= $3; + } + | OPTIMIZE PARTITION_SYM opt_no_write_to_binlog + all_or_alt_part_name_list + { + LEX *lex= Lex; + lex->alter_info.flags|= ALTER_OPTIMIZE_PARTITION; + lex->no_write_to_binlog= $3; + lex->check_opt.init(); + } + opt_no_write_to_binlog opt_mi_check_type + | ANALYZE_SYM PARTITION_SYM opt_no_write_to_binlog + all_or_alt_part_name_list + { + LEX *lex= Lex; + lex->alter_info.flags|= ALTER_ANALYZE_PARTITION; + lex->no_write_to_binlog= $3; + lex->check_opt.init(); + } + opt_mi_check_type + | CHECK_SYM PARTITION_SYM all_or_alt_part_name_list + { + LEX *lex= Lex; + lex->alter_info.flags|= ALTER_CHECK_PARTITION; + lex->check_opt.init(); + } + opt_mi_check_type + | REPAIR PARTITION_SYM opt_no_write_to_binlog + all_or_alt_part_name_list + { + LEX *lex= Lex; + lex->alter_info.flags|= ALTER_REPAIR_PARTITION; + lex->no_write_to_binlog= $3; + lex->check_opt.init(); + } + opt_mi_repair_type + | COALESCE PARTITION_SYM opt_no_write_to_binlog ulong_num + { + LEX *lex= Lex; + lex->alter_info.flags|= ALTER_COALESCE_PARTITION; + lex->no_write_to_binlog= $3; + lex->alter_info.no_parts= $4; + } + | reorg_partition_rule + ; + +all_or_alt_part_name_list: + | ALL + { + Lex->alter_info.flags|= ALTER_ALL_PARTITION; + } + | alt_part_name_list + ; + +add_partition_rule: + ADD PARTITION_SYM opt_no_write_to_binlog + { + LEX *lex= Lex; + lex->part_info= new partition_info(); + if (!lex->part_info) + { + mem_alloc_error(sizeof(partition_info)); + YYABORT; + } + lex->alter_info.flags|= ALTER_ADD_PARTITION; + lex->no_write_to_binlog= $3; + } + add_part_extra + {} + ; + +add_part_extra: + | '(' part_def_list ')' + { + LEX *lex= Lex; + lex->part_info->no_parts= lex->part_info->partitions.elements; + } + | PARTITIONS_SYM ulong_num + { + LEX *lex= Lex; + lex->part_info->no_parts= $2; + } + ; + +reorg_partition_rule: + REORGANIZE_SYM PARTITION_SYM opt_no_write_to_binlog + { + LEX *lex= Lex; + lex->part_info= new partition_info(); + if (!lex->part_info) + { + mem_alloc_error(sizeof(partition_info)); + YYABORT; + } + lex->no_write_to_binlog= $3; + } + reorg_parts_rule + ; + +reorg_parts_rule: + /* empty */ + { + Lex->alter_info.flags|= ALTER_TABLE_REORG; + } + | + alt_part_name_list + { + Lex->alter_info.flags|= ALTER_REORGANIZE_PARTITION; + } + INTO '(' part_def_list ')' + { + LEX *lex= Lex; + lex->part_info->no_parts= lex->part_info->partitions.elements; + } + ; + +alt_part_name_list: + alt_part_name_item {} + | alt_part_name_list ',' alt_part_name_item {} + ; + +alt_part_name_item: + ident + { + if (Lex->alter_info.partition_names.push_back($1.str)) + { + mem_alloc_error(1); + YYABORT; + } + } + ; + +/* + End of management of partition commands +*/ + +alter_list: + alter_list_item + | alter_list ',' alter_list_item + ; add_column: ADD opt_column @@ -3748,6 +5367,8 @@ restore: RESTORE_SYM table_or_tables { Lex->sql_command = SQLCOM_RESTORE_TABLE; + WARN_DEPRECATED(yythd, "5.2", "RESTORE TABLE", + "MySQL Administrator (mysqldump, mysql)"); } table_list FROM TEXT_STRING_sys { @@ -3758,6 +5379,8 @@ backup: BACKUP_SYM table_or_tables { Lex->sql_command = SQLCOM_BACKUP_TABLE; + WARN_DEPRECATED(yythd, "5.2", "BACKUP TABLE", + "MySQL Administrator (mysqldump, mysql)"); } table_list TO_SYM TEXT_STRING_sys { @@ -3817,6 +5440,14 @@ analyze: {} ; +binlog_base64_event: + BINLOG_SYM TEXT_STRING_sys + { + Lex->sql_command = SQLCOM_BINLOG_BASE64_EVENT; + Lex->comment= $2; + } + ; + check: CHECK_SYM table_or_tables { @@ -3875,6 +5506,13 @@ rename: } table_to_table_list {} + | RENAME DATABASE + { + Lex->db_list.empty(); + Lex->sql_command= SQLCOM_RENAME_DB; + } + db_to_db + {} | RENAME USER clear_privileges rename_list { Lex->sql_command = SQLCOM_RENAME_USER; @@ -3910,6 +5548,17 @@ table_to_table: YYABORT; }; +db_to_db: + ident TO_SYM ident + { + LEX *lex=Lex; + if (Lex->db_list.push_back((LEX_STRING*) + sql_memdup(&$1, sizeof(LEX_STRING))) || + Lex->db_list.push_back((LEX_STRING*) + sql_memdup(&$3, sizeof(LEX_STRING)))) + YYABORT; + }; + keycache: CACHE_SYM INDEX_SYM keycache_list IN_SYM key_cache_name { @@ -4090,7 +5739,7 @@ select_options: /* empty*/ | select_option_list { - if (test_all_bits(Select->options, SELECT_ALL | SELECT_DISTINCT)) + if (Select->options & SELECT_DISTINCT && Select->options & SELECT_ALL) { my_error(ER_WRONG_USAGE, MYF(0), "ALL", "DISTINCT"); YYABORT; @@ -4997,9 +6646,20 @@ geometry_function: ; fulltext_options: - /* nothing */ { $$= FT_NL; } - | WITH QUERY_SYM EXPANSION_SYM { $$= FT_NL | FT_EXPAND; } - | IN_SYM BOOLEAN_SYM MODE_SYM { $$= FT_BOOL; } + opt_natural_language_mode opt_query_expansion + { $$= $1 | $2; } + | IN_SYM BOOLEAN_SYM MODE_SYM + { $$= FT_BOOL; } + ; + +opt_natural_language_mode: + /* nothing */ { $$= FT_NL; } + | IN_SYM NATURAL LANGUAGE_SYM MODE_SYM { $$= FT_NL; } + ; + +opt_query_expansion: + /* nothing */ { $$= 0; } + | WITH QUERY_SYM EXPANSION_SYM { $$= FT_EXPAND; } ; udf_expr_list: @@ -6056,11 +7716,47 @@ drop: lex->sql_command= SQLCOM_DROP_VIEW; lex->drop_if_exists= $3; } + | DROP EVENT_SYM if_exists sp_name + { + LEX *lex=Lex; + + if (lex->et) + { + /* + Recursive events are not possible because recursive SPs + are not also possible. lex->sp_head is not stacked. + */ + my_error(ER_SP_NO_RECURSIVE_CREATE, MYF(0), "EVENT"); + YYABORT; + } + + if (!(lex->et= new Event_timed())) + YYABORT; + + if (!lex->et_compile_phase) + { + lex->et->init_name(YYTHD, $4); + lex->et->init_definer(YYTHD); + } + + lex->sql_command = SQLCOM_DROP_EVENT; + lex->drop_if_exists= $3; + } | DROP TRIGGER_SYM sp_name { LEX *lex= Lex; lex->sql_command= SQLCOM_DROP_TRIGGER; lex->spname= $3; + } + | DROP TABLESPACE tablespace_name opt_ts_engine opt_ts_wait + { + LEX *lex= Lex; + lex->alter_tablespace_info->ts_cmd_type= DROP_TABLESPACE; + } + | DROP LOGFILE_SYM GROUP logfile_group_name opt_ts_engine opt_ts_wait + { + LEX *lex= Lex; + lex->alter_tablespace_info->ts_cmd_type= DROP_LOGFILE_GROUP; } ; @@ -6434,6 +8130,15 @@ show_param: if (prepare_schema_table(YYTHD, lex, 0, SCH_TRIGGERS)) YYABORT; } + | opt_full EVENTS_SYM opt_db wild_and_where + { + LEX *lex= Lex; + lex->sql_command= SQLCOM_SELECT; + lex->orig_sql_command= SQLCOM_SHOW_EVENTS; + lex->select_lex.db= $3; + if (prepare_schema_table(YYTHD, lex, 0, SCH_EVENTS)) + YYABORT; + } | TABLE_SYM STATUS_SYM opt_db wild_and_where { LEX *lex= Lex; @@ -6452,9 +8157,20 @@ show_param: if (prepare_schema_table(YYTHD, lex, 0, SCH_OPEN_TABLES)) YYABORT; } + | PLUGIN_SYM + { + LEX *lex= Lex; + lex->sql_command= SQLCOM_SELECT; + lex->orig_sql_command= SQLCOM_SHOW_PLUGINS; + if (prepare_schema_table(YYTHD, lex, 0, SCH_PLUGINS)) + YYABORT; + } | ENGINE_SYM storage_engines { Lex->create_info.db_type= $2; } show_engine_param + | ENGINE_SYM ALL + { Lex->create_info.db_type= NULL; } + show_engine_param | opt_full COLUMNS from_or_in table_ident opt_db wild_and_where { LEX *lex= Lex; @@ -6507,12 +8223,20 @@ show_param: { LEX *lex=Lex; lex->sql_command= SQLCOM_SHOW_STORAGE_ENGINES; - WARN_DEPRECATED("SHOW TABLE TYPES", "SHOW [STORAGE] ENGINES"); + WARN_DEPRECATED(yythd, "5.2", "SHOW TABLE TYPES", "'SHOW [STORAGE] ENGINES'"); } | opt_storage ENGINES_SYM { LEX *lex=Lex; lex->sql_command= SQLCOM_SHOW_STORAGE_ENGINES; + lex->orig_sql_command= SQLCOM_SHOW_AUTHORS; + if (prepare_schema_table(YYTHD, lex, 0, SCH_ENGINES)) + YYABORT; + } + | AUTHORS_SYM + { + LEX *lex=Lex; + lex->sql_command= SQLCOM_SHOW_AUTHORS; } | PRIVILEGES { @@ -6535,11 +8259,31 @@ show_param: lex->option_type= $1; if (prepare_schema_table(YYTHD, lex, 0, SCH_STATUS)) YYABORT; - } + } | INNOBASE_SYM STATUS_SYM - { Lex->sql_command = SQLCOM_SHOW_INNODB_STATUS; WARN_DEPRECATED("SHOW INNODB STATUS", "SHOW ENGINE INNODB STATUS"); } + { + LEX *lex= Lex; + lex->sql_command = SQLCOM_SHOW_ENGINE_STATUS; + if (!(lex->create_info.db_type= + ha_resolve_by_legacy_type(YYTHD, DB_TYPE_INNODB))) + { + my_error(ER_UNKNOWN_STORAGE_ENGINE, MYF(0), "InnoDB"); + YYABORT; + } + WARN_DEPRECATED(yythd, "5.2", "SHOW INNODB STATUS", "'SHOW ENGINE INNODB STATUS'"); + } | MUTEX_SYM STATUS_SYM - { Lex->sql_command = SQLCOM_SHOW_MUTEX_STATUS; } + { + LEX *lex= Lex; + lex->sql_command = SQLCOM_SHOW_ENGINE_MUTEX; + if (!(lex->create_info.db_type= + ha_resolve_by_legacy_type(YYTHD, DB_TYPE_INNODB))) + { + my_error(ER_UNKNOWN_STORAGE_ENGINE, MYF(0), "InnoDB"); + YYABORT; + } + WARN_DEPRECATED(yythd, "5.2", "SHOW MUTEX STATUS", "'SHOW ENGINE INNODB MUTEX'"); + } | opt_full PROCESSLIST_SYM { Lex->sql_command= SQLCOM_SHOW_PROCESSLIST;} | opt_var_type VARIABLES wild_and_where @@ -6568,9 +8312,29 @@ show_param: YYABORT; } | BERKELEY_DB_SYM LOGS_SYM - { Lex->sql_command= SQLCOM_SHOW_LOGS; WARN_DEPRECATED("SHOW BDB LOGS", "SHOW ENGINE BDB LOGS"); } + { + LEX *lex= Lex; + lex->sql_command= SQLCOM_SHOW_ENGINE_LOGS; + if (!(lex->create_info.db_type= + ha_resolve_by_legacy_type(YYTHD, DB_TYPE_BERKELEY_DB))) + { + my_error(ER_UNKNOWN_STORAGE_ENGINE, MYF(0), "BerkeleyDB"); + YYABORT; + } + WARN_DEPRECATED(yythd, "5.2", "SHOW BDB LOGS", "'SHOW ENGINE BDB LOGS'"); + } | LOGS_SYM - { Lex->sql_command= SQLCOM_SHOW_LOGS; WARN_DEPRECATED("SHOW LOGS", "SHOW ENGINE BDB LOGS"); } + { + LEX *lex= Lex; + lex->sql_command= SQLCOM_SHOW_ENGINE_LOGS; + if (!(lex->create_info.db_type= + ha_resolve_by_legacy_type(YYTHD, DB_TYPE_BERKELEY_DB))) + { + my_error(ER_UNKNOWN_STORAGE_ENGINE, MYF(0), "BerkeleyDB"); + YYABORT; + } + WARN_DEPRECATED(yythd, "5.2", "SHOW LOGS", "'SHOW ENGINE BDB LOGS'"); + } | GRANTS { LEX *lex=Lex; @@ -6686,34 +8450,24 @@ show_param: Lex->spname= $3; #endif } - ; + | CREATE EVENT_SYM sp_name + { + Lex->sql_command = SQLCOM_SHOW_CREATE_EVENT; + Lex->spname= $3; + Lex->et= new Event_timed(); + if (!Lex->et) + YYABORT; + Lex->et->init_definer(YYTHD); + } + ; show_engine_param: STATUS_SYM - { - switch (Lex->create_info.db_type) { - case DB_TYPE_NDBCLUSTER: - Lex->sql_command = SQLCOM_SHOW_NDBCLUSTER_STATUS; - break; - case DB_TYPE_INNODB: - Lex->sql_command = SQLCOM_SHOW_INNODB_STATUS; - break; - default: - my_error(ER_NOT_SUPPORTED_YET, MYF(0), "STATUS"); - YYABORT; - } - } + { Lex->sql_command= SQLCOM_SHOW_ENGINE_STATUS; } + | MUTEX_SYM + { Lex->sql_command= SQLCOM_SHOW_ENGINE_MUTEX; } | LOGS_SYM - { - switch (Lex->create_info.db_type) { - case DB_TYPE_BERKELEY_DB: - Lex->sql_command = SQLCOM_SHOW_LOGS; - break; - default: - my_error(ER_NOT_SUPPORTED_YET, MYF(0), "LOGS"); - YYABORT; - } - }; + { Lex->sql_command= SQLCOM_SHOW_ENGINE_LOGS; }; master_or_binary: MASTER_SYM @@ -6789,8 +8543,10 @@ describe_command: opt_extended_describe: /* empty */ {} | EXTENDED_SYM { Lex->describe|= DESCRIBE_EXTENDED; } + | PARTITIONS_SYM { Lex->describe|= DESCRIBE_PARTITIONS; } ; + opt_describe_column: /* empty */ {} | text_string { Lex->wild= $1; } @@ -6931,7 +8687,9 @@ load: LOAD DATA_SYM LOAD TABLE_SYM table_ident FROM MASTER_SYM { LEX *lex=Lex; - if (lex->sphead) + WARN_DEPRECATED(yythd, "5.2", "LOAD TABLE FROM MASTER", + "MySQL Administrator (mysqldump, mysql)"); + if (lex->sphead) { my_error(ER_SP_BADSTATEMENT, MYF(0), "LOAD TABLE"); YYABORT; @@ -7485,6 +9243,12 @@ TEXT_STRING_filesystem: ident: IDENT_sys { $$=$1; } + | READ_ONLY_SYM + { + THD *thd= YYTHD; + $$.str= thd->strmake("read_only",9); + $$.length= 9; + } | keyword { THD *thd= YYTHD; @@ -7549,6 +9313,7 @@ user: keyword: keyword_sp {} | ASCII_SYM {} + | AUTHORS_SYM {} | BACKUP_SYM {} | BEGIN_SYM {} | BYTE_SYM {} @@ -7566,9 +9331,13 @@ keyword: | FLUSH_SYM {} | HANDLER_SYM {} | HELP_SYM {} + | INSTALL_SYM {} | LANGUAGE_SYM {} | NO_SYM {} | OPEN_SYM {} + | PARSER_SYM {} + | PARTITION_SYM {} + | PLUGIN_SYM {} | PREPARE_SYM {} | REPAIR {} | RESET_SYM {} @@ -7578,10 +9347,12 @@ keyword: | SECURITY_SYM {} | SIGNED_SYM {} | SLAVE {} + | SONAME_SYM {} | START_SYM {} | STOP_SYM {} | TRUNCATE_SYM {} | UNICODE_SYM {} + | UNINSTALL_SYM {} | XA_SYM {} ; @@ -7599,7 +9370,9 @@ keyword_sp: | AGGREGATE_SYM {} | ALGORITHM_SYM {} | ANY_SYM {} + | AT_SYM {} | AUTO_INC {} + | AUTOEXTEND_SIZE_SYM {} | AVG_ROW_LENGTH {} | AVG_SYM {} | BERKELEY_DB_SYM {} @@ -7613,16 +9386,19 @@ keyword_sp: | CHANGED {} | CIPHER_SYM {} | CLIENT_SYM {} - | CODE_SYM {} + | COALESCE {} + | CODE_SYM {} | COLLATION_SYM {} | COLUMNS {} | COMMITTED_SYM {} | COMPACT_SYM {} + | COMPLETION_SYM {} | COMPRESSED_SYM {} | CONCURRENT {} | CONSISTENT_SYM {} | CUBE_SYM {} | DATA_SYM {} + | DATAFILE_SYM {} | DATETIME {} | DATE_SYM {} | DAY_SYM {} @@ -7630,21 +9406,26 @@ keyword_sp: | DELAY_KEY_WRITE_SYM {} | DES_KEY_FILE {} | DIRECTORY_SYM {} + | DISABLE_SYM {} | DISCARD {} + | DISK_SYM {} | DUMPFILE {} | DUPLICATE_SYM {} | DYNAMIC_SYM {} + | ENDS_SYM {} | ENUM {} | ENGINE_SYM {} | ENGINES_SYM {} | ERRORS {} | ESCAPE_SYM {} + | EVENT_SYM {} | EVENTS_SYM {} - | EXPANSION_SYM {} + | EVERY_SYM {} + | EXPANSION_SYM {} | EXTENDED_SYM {} + | EXTENT_SIZE_SYM {} | FAST_SYM {} | FOUND_SYM {} - | DISABLE_SYM {} | ENABLE_SYM {} | FULL {} | FILE_SYM {} @@ -7663,6 +9444,7 @@ keyword_sp: | INVOKER_SYM {} | IMPORT {} | INDEXES {} + | INITIAL_SIZE_SYM {} | ISOLATION {} | ISSUER_SYM {} | INNOBASE_SYM {} @@ -7670,10 +9452,13 @@ keyword_sp: | RELAY_THREAD {} | LAST_SYM {} | LEAVES {} + | LESS_SYM {} | LEVEL_SYM {} | LINESTRING {} + | LIST_SYM {} | LOCAL_SYM {} | LOCKS_SYM {} + | LOGFILE_SYM {} | LOGS_SYM {} | MAX_ROWS {} | MASTER_SYM {} @@ -7693,9 +9478,12 @@ keyword_sp: | MASTER_SSL_KEY_SYM {} | MAX_CONNECTIONS_PER_HOUR {} | MAX_QUERIES_PER_HOUR {} + | MAX_SIZE_SYM {} | MAX_UPDATES_PER_HOUR {} | MAX_USER_CONNECTIONS_SYM {} + | MAX_VALUE_SYM {} | MEDIUM_SYM {} + | MEMORY_SYM {} | MERGE_SYM {} | MICROSECOND_SYM {} | MIGRATE_SYM {} @@ -7715,6 +9503,8 @@ keyword_sp: | NDBCLUSTER_SYM {} | NEXT_SYM {} | NEW_SYM {} + | NO_WAIT_SYM {} + | NODEGROUP_SYM {} | NONE_SYM {} | NVARCHAR_SYM {} | OFFSET_SYM {} @@ -7723,10 +9513,12 @@ keyword_sp: | ONE_SYM {} | PACK_KEYS_SYM {} | PARTIAL {} + | PARTITIONS_SYM {} | PASSWORD {} | PHASE_SYM {} | POINT_SYM {} | POLYGON {} + | PRESERVE_SYM {} | PREV_SYM {} | PRIVILEGES {} | PROCESS {} @@ -7734,16 +9526,15 @@ keyword_sp: | QUARTER_SYM {} | QUERY_SYM {} | QUICK {} - | RAID_0_SYM {} - | RAID_CHUNKS {} - | RAID_CHUNKSIZE {} - | RAID_STRIPED_SYM {} - | RAID_TYPE {} + | REBUILD_SYM {} | RECOVER_SYM {} + | REDO_BUFFER_SIZE_SYM {} + | REDOFILE_SYM {} | REDUNDANT_SYM {} | RELAY_LOG_FILE_SYM {} | RELAY_LOG_POS_SYM {} | RELOAD {} + | REORGANIZE_SYM {} | REPEATABLE_SYM {} | REPLICATION {} | RESOURCES {} @@ -7755,6 +9546,7 @@ keyword_sp: | ROW_FORMAT_SYM {} | ROW_SYM {} | RTREE_SYM {} + | SCHEDULE_SYM {} | SECOND_SYM {} | SERIAL_SYM {} | SERIALIZABLE_SYM {} @@ -7768,11 +9560,14 @@ keyword_sp: | SQL_BUFFER_RESULT {} | SQL_NO_CACHE_SYM {} | SQL_THREAD {} + | STARTS_SYM {} | STATUS_SYM {} | STORAGE_SYM {} | STRING_SYM {} | SUBDATE_SYM {} | SUBJECT_SYM {} + | SUBPARTITION_SYM {} + | SUBPARTITIONS_SYM {} | SUPER_SYM {} | SUSPEND_SYM {} | TABLES {} @@ -7780,6 +9575,7 @@ keyword_sp: | TEMPORARY {} | TEMPTABLE_SYM {} | TEXT_SYM {} + | THAN_SYM {} | TRANSACTION_SYM {} | TRIGGERS_SYM {} | TIMESTAMP {} @@ -7792,6 +9588,8 @@ keyword_sp: | FUNCTION_SYM {} | UNCOMMITTED_SYM {} | UNDEFINED_SYM {} + | UNDO_BUFFER_SIZE_SYM {} + | UNDOFILE_SYM {} | UNKNOWN_SYM {} | UNTIL_SYM {} | USER {} @@ -7800,6 +9598,7 @@ keyword_sp: | VIEW_SYM {} | VALUE_SYM {} | WARNINGS {} + | WAIT_SYM {} | WEEK_SYM {} | WORK_SYM {} | X509_SYM {} @@ -7932,7 +9731,7 @@ sys_option_value: { LEX *lex=Lex; - if ($2.var == &trg_new_row_fake_var) + if ($2.var == trg_new_row_fake_var) { /* We are in trigger and assigning value to field of new row */ Item *it; @@ -8009,8 +9808,7 @@ sys_option_value: | option_type TRANSACTION_SYM ISOLATION LEVEL_SYM isolation_types { LEX *lex=Lex; - if ($1) - lex->option_type= $1; + lex->option_type= $1; lex->var_list.push_back(new set_var(lex->option_type, find_sys_var("tx_isolation"), &null_lex_str, @@ -8158,7 +9956,7 @@ internal_variable_name: YYABORT; } /* This special combination will denote field of NEW row */ - $$.var= &trg_new_row_fake_var; + $$.var= trg_new_row_fake_var; $$.base_name= $3; } else @@ -8493,6 +10291,8 @@ object_privilege: | CREATE ROUTINE_SYM { Lex->grant |= CREATE_PROC_ACL; } | ALTER ROUTINE_SYM { Lex->grant |= ALTER_PROC_ACL; } | CREATE USER { Lex->grant |= CREATE_USER_ACL; } + | EVENT_SYM { Lex->grant |= EVENT_ACL;} + | TRIGGER_SYM { Lex->grant |= TRIGGER_ACL; } ; @@ -9131,18 +10931,18 @@ trigger_tail: { LEX *lex= Lex; sp_head *sp; - + if (lex->sphead) { my_error(ER_SP_NO_RECURSIVE_CREATE, MYF(0), "TRIGGER"); YYABORT; } - + if (!(sp= new sp_head())) YYABORT; sp->reset_thd_mem_root(YYTHD); sp->init(lex); - + lex->trigger_definition_begin= $2; lex->ident.str= $7; lex->ident.length= $9 - $7; @@ -9155,9 +10955,9 @@ trigger_tail: stored procedure, otherwise yylex will chop it into pieces at each ';'. */ - sp->m_old_cmq= YYTHD->client_capabilities & CLIENT_MULTI_QUERIES; + $<ulong_num>$= YYTHD->client_capabilities & CLIENT_MULTI_QUERIES; YYTHD->client_capabilities &= ~CLIENT_MULTI_QUERIES; - + bzero((char *)&lex->sp_chistics, sizeof(st_sp_chistics)); lex->sphead->m_chistics= &lex->sp_chistics; lex->sphead->m_body_begin= lex->ptr; @@ -9168,17 +10968,17 @@ trigger_tail: { LEX *lex= Lex; sp_head *sp= lex->sphead; - + lex->sql_command= SQLCOM_CREATE_TRIGGER; sp->init_strings(YYTHD, lex, $3); /* Restore flag if it was cleared above */ - if (sp->m_old_cmq) - YYTHD->client_capabilities |= CLIENT_MULTI_QUERIES; + + YYTHD->client_capabilities |= $<ulong_num>13; sp->restore_thd_mem_root(YYTHD); - + if (sp->is_not_allowed_in_function("trigger")) YYABORT; - + /* We have to do it after parsing trigger body, because some of sp_proc_stmt alternatives are not saving/restoring LEX, so @@ -9269,4 +11069,19 @@ opt_migrate: | FOR_SYM MIGRATE_SYM { Lex->xa_opt=XA_FOR_MIGRATE; } ; +install: + INSTALL_SYM PLUGIN_SYM ident SONAME_SYM TEXT_STRING_sys + { + LEX *lex= Lex; + lex->sql_command= SQLCOM_INSTALL_PLUGIN; + lex->comment= $3; + lex->ident= $5; + }; +uninstall: + UNINSTALL_SYM PLUGIN_SYM ident + { + LEX *lex= Lex; + lex->sql_command= SQLCOM_UNINSTALL_PLUGIN; + lex->comment= $3; + }; diff --git a/sql/strfunc.cc b/sql/strfunc.cc index c822d10af46..2525703172f 100644 --- a/sql/strfunc.cc +++ b/sql/strfunc.cc @@ -235,3 +235,80 @@ uint check_word(TYPELIB *lib, const char *val, const char *end, *end_of_word= ptr; return res; } + + +/* + Converts a string between character sets + + SYNOPSIS + strconvert() + from_cs source character set + from source, a null terminated string + to destination buffer + to_length destination buffer length + + NOTES + 'to' is always terminated with a '\0' character. + If there is no enough space to convert whole string, + only prefix is converted, and terminated with '\0'. + + RETURN VALUES + result string length +*/ + + +uint strconvert(CHARSET_INFO *from_cs, const char *from, + CHARSET_INFO *to_cs, char *to, uint to_length, uint *errors) +{ + int cnvres; + my_wc_t wc; + char *to_start= to; + uchar *to_end= (uchar*) to + to_length - 1; + int (*mb_wc)(struct charset_info_st *, my_wc_t *, const uchar *, + const uchar *)= from_cs->cset->mb_wc; + int (*wc_mb)(struct charset_info_st *, my_wc_t, uchar *s, uchar *e)= + to_cs->cset->wc_mb; + uint error_count= 0; + + while (1) + { + /* + Using 'from + 10' is safe: + - it is enough to scan a single character in any character set. + - if remaining string is shorter than 10, then mb_wc will return + with error because of unexpected '\0' character. + */ + if ((cnvres= (*mb_wc)(from_cs, &wc, + (uchar*) from, (uchar*) from + 10)) > 0) + { + if (!wc) + break; + from+= cnvres; + } + else if (cnvres == MY_CS_ILSEQ) + { + error_count++; + from++; + wc= '?'; + } + else + break; // Impossible char. + +outp: + + if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0) + to+= cnvres; + else if (cnvres == MY_CS_ILUNI && wc != '?') + { + error_count++; + wc= '?'; + goto outp; + } + else + break; + } + *to= '\0'; + *errors= error_count; + return (uint32) (to - to_start); + +} diff --git a/sql/structs.h b/sql/structs.h index 9421ebdc2af..e369d8ed7e8 100644 --- a/sql/structs.h +++ b/sql/structs.h @@ -87,6 +87,15 @@ typedef struct st_key { uint extra_length; uint usable_key_parts; /* Should normally be = key_parts */ enum ha_key_alg algorithm; + /* + Note that parser is used when the table is opened for use, and + parser_name is used when the table is being created. + */ + union + { + struct st_plugin_int *parser; /* Fulltext [pre]parser */ + LEX_STRING *parser_name; /* Fulltext [pre]parser name */ + }; KEY_PART_INFO *key_part; char *name; /* Name of key */ /* @@ -160,54 +169,16 @@ typedef struct st_known_date_time_format { const char *time_format; } KNOWN_DATE_TIME_FORMAT; - -enum SHOW_TYPE -{ - SHOW_UNDEF, - SHOW_LONG, SHOW_LONGLONG, SHOW_INT, SHOW_CHAR, SHOW_CHAR_PTR, - SHOW_DOUBLE_STATUS, - SHOW_BOOL, SHOW_MY_BOOL, SHOW_OPENTABLES, SHOW_STARTTIME, SHOW_QUESTION, - SHOW_LONG_CONST, SHOW_INT_CONST, SHOW_HAVE, SHOW_SYS, SHOW_HA_ROWS, - SHOW_VARS, -#ifdef HAVE_OPENSSL - SHOW_SSL_CTX_SESS_ACCEPT, SHOW_SSL_CTX_SESS_ACCEPT_GOOD, - SHOW_SSL_GET_VERSION, SHOW_SSL_CTX_GET_SESSION_CACHE_MODE, - SHOW_SSL_CTX_SESS_CB_HITS, SHOW_SSL_CTX_SESS_ACCEPT_RENEGOTIATE, - SHOW_SSL_CTX_SESS_NUMBER, SHOW_SSL_SESSION_REUSED, - SHOW_SSL_CTX_SESS_GET_CACHE_SIZE, SHOW_SSL_GET_CIPHER, - SHOW_SSL_GET_DEFAULT_TIMEOUT, SHOW_SSL_GET_VERIFY_MODE, - SHOW_SSL_CTX_GET_VERIFY_MODE, SHOW_SSL_GET_VERIFY_DEPTH, - SHOW_SSL_CTX_GET_VERIFY_DEPTH, SHOW_SSL_CTX_SESS_CONNECT, - SHOW_SSL_CTX_SESS_CONNECT_RENEGOTIATE, SHOW_SSL_CTX_SESS_CONNECT_GOOD, - SHOW_SSL_CTX_SESS_HITS, SHOW_SSL_CTX_SESS_MISSES, - SHOW_SSL_CTX_SESS_TIMEOUTS, SHOW_SSL_CTX_SESS_CACHE_FULL, - SHOW_SSL_GET_CIPHER_LIST, -#endif /* HAVE_OPENSSL */ - SHOW_NET_COMPRESSION, - SHOW_RPL_STATUS, SHOW_SLAVE_RUNNING, SHOW_SLAVE_RETRIED_TRANS, - SHOW_KEY_CACHE_LONG, SHOW_KEY_CACHE_CONST_LONG, SHOW_KEY_CACHE_LONGLONG, - SHOW_LONG_STATUS, SHOW_LONG_CONST_STATUS, SHOW_SLAVE_SKIP_ERRORS -}; - enum SHOW_COMP_OPTION { SHOW_OPTION_YES, SHOW_OPTION_NO, SHOW_OPTION_DISABLED}; extern const char *show_comp_option_name[]; -typedef int *(*update_var)(THD *, struct show_var_st *); - - -typedef struct show_var_st { - const char *name; - char *value; - SHOW_TYPE type; -} SHOW_VAR; - +typedef int *(*update_var)(THD *, struct st_mysql_show_var *); typedef struct st_lex_user { LEX_STRING user, host, password; } LEX_USER; - /* This structure specifies the maximum amount of resources which can be consumed by each account. Zero value of a member means diff --git a/sql/table.cc b/sql/table.cc index 1a2c2b8f073..d6a715ae1b4 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -24,134 +24,420 @@ /* Functions defined in this file */ -static void frm_error(int error,TABLE *form,const char *name, - int errortype, int errarg); +void open_table_error(TABLE_SHARE *share, int error, int db_errno, + myf errortype, int errarg); +static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head, + File file); static void fix_type_pointers(const char ***array, TYPELIB *point_to_type, uint types, char **names); -static uint find_field(TABLE *form,uint start,uint length); +static uint find_field(Field **fields, uint start, uint length); -static byte* get_field_name(Field **buff,uint *length, +/* Get column name from column hash */ + +static byte *get_field_name(Field **buff, uint *length, my_bool not_used __attribute__((unused))) { *length= (uint) strlen((*buff)->field_name); return (byte*) (*buff)->field_name; } + + +/* + Returns pointer to '.frm' extension of the file name. + + SYNOPSIS + fn_rext() + name file name + + DESCRIPTION + Checks file name part starting with the rightmost '.' character, + and returns it if it is equal to '.frm'. + + TODO + It is a good idea to get rid of this function modifying the code + to garantee that the functions presently calling fn_rext() always + get arguments in the same format: either with '.frm' or without '.frm'. + + RETURN VALUES + Pointer to the '.frm' extension. If there is no extension, + or extension is not '.frm', pointer at the end of file name. +*/ + +char *fn_rext(char *name) +{ + char *res= strrchr(name, '.'); + if (res && !strcmp(res, ".frm")) + return res; + return name + strlen(name); +} + + /* - Open a .frm file + Allocate a setup TABLE_SHARE structure + + SYNOPSIS + alloc_table_share() + TABLE_LIST Take database and table name from there + key Table cache key (db \0 table_name \0...) + key_length Length of key + + RETURN + 0 Error (out of memory) + # Share +*/ + +TABLE_SHARE *alloc_table_share(TABLE_LIST *table_list, char *key, + uint key_length) +{ + MEM_ROOT mem_root; + TABLE_SHARE *share; + char path[FN_REFLEN], normalized_path[FN_REFLEN]; + uint path_length, normalized_length; + + path_length= build_table_filename(path, sizeof(path) - 1, + table_list->db, + table_list->table_name, ""); + normalized_length= build_table_filename(normalized_path, + sizeof(normalized_path) - 1, + table_list->db, + table_list->table_name, ""); + + init_sql_alloc(&mem_root, TABLE_ALLOC_BLOCK_SIZE, 0); + if ((share= (TABLE_SHARE*) alloc_root(&mem_root, + sizeof(*share) + key_length + + path_length + normalized_length +2))) + { + bzero((char*) share, sizeof(*share)); + share->table_cache_key.str= (char*) (share+1); + share->table_cache_key.length= key_length; + memcpy(share->table_cache_key.str, key, key_length); + + /* Use the fact the key is db/0/table_name/0 */ + share->db.str= share->table_cache_key.str; + share->db.length= strlen(share->db.str); + share->table_name.str= share->db.str + share->db.length + 1; + share->table_name.length= strlen(share->table_name.str); + + share->path.str= share->table_cache_key.str+ key_length; + share->path.length= path_length; + strmov(share->path.str, path); + share->normalized_path.str= share->path.str+ path_length+1; + share->normalized_path.length= normalized_length; + strmov(share->normalized_path.str, normalized_path); + + share->version= refresh_version; + share->flush_version= flush_version; + +#ifdef HAVE_ROW_BASED_REPLICATION + /* + This constant is used to mark that no table map version has been + assigned. No arithmetic is done on the value: it will be + overwritten with a value taken from MYSQL_BIN_LOG. + */ + share->table_map_version= ~(ulonglong)0; + + /* + Since alloc_table_share() can be called without any locking (for + example, ha_create_table... functions), we do not assign a table + map id here. Instead we assign a value that is not used + elsewhere, and then assign a table map id inside open_table() + under the protection of the LOCK_open mutex. + */ + share->table_map_id= ULONG_MAX; +#endif + + memcpy((char*) &share->mem_root, (char*) &mem_root, sizeof(mem_root)); + pthread_mutex_init(&share->mutex, MY_MUTEX_INIT_FAST); + pthread_cond_init(&share->cond, NULL); + } + return share; +} + + +/* + Initialize share for temporary tables + + SYNOPSIS + init_tmp_table_share() + share Share to fill + key Table_cache_key, as generated from create_table_def_key. + must start with db name. + key_length Length of key + table_name Table name + path Path to file (possible in lower case) without .frm + + NOTES + This is different from alloc_table_share() because temporary tables + don't have to be shared between threads or put into the table def + cache, so we can do some things notable simpler and faster + + If table is not put in thd->temporary_tables (happens only when + one uses OPEN TEMPORARY) then one can specify 'db' as key and + use key_length= 0 as neither table_cache_key or key_length will be used). +*/ + +void init_tmp_table_share(TABLE_SHARE *share, const char *key, + uint key_length, const char *table_name, + const char *path) +{ + DBUG_ENTER("init_tmp_table_share"); + + bzero((char*) share, sizeof(*share)); + init_sql_alloc(&share->mem_root, TABLE_ALLOC_BLOCK_SIZE, 0); + share->tmp_table= INTERNAL_TMP_TABLE; + share->db.str= (char*) key; + share->db.length= strlen(key); + share->table_cache_key.str= (char*) key; + share->table_cache_key.length= key_length; + share->table_name.str= (char*) table_name; + share->table_name.length= strlen(table_name); + share->path.str= (char*) path; + share->normalized_path.str= (char*) path; + share->path.length= share->normalized_path.length= strlen(path); + share->frm_version= FRM_VER_TRUE_VARCHAR; + +#ifdef HAVE_ROW_BASED_REPLICATION + /* + Temporary tables are not replicated, but we set up these fields + anyway to be able to catch errors. + */ + share->table_map_version= ~(ulonglong)0; + share->table_map_id= ULONG_MAX; +#endif + + DBUG_VOID_RETURN; +} + + +/* + Free table share and memory used by it + + SYNOPSIS + free_table_share() + share Table share + + NOTES + share->mutex must be locked when we come here if it's not a temp table +*/ + +void free_table_share(TABLE_SHARE *share) +{ + MEM_ROOT mem_root; + DBUG_ENTER("free_table_share"); + DBUG_PRINT("enter", ("table: %s.%s", share->db.str, share->table_name.str)); + DBUG_ASSERT(share->ref_count == 0); + + /* + If someone is waiting for this to be deleted, inform it about this. + Don't do a delete until we know that no one is refering to this anymore. + */ + if (share->tmp_table == NO_TMP_TABLE) + { + /* share->mutex is locked in release_table_share() */ + while (share->waiting_on_cond) + { + pthread_cond_broadcast(&share->cond); + pthread_cond_wait(&share->cond, &share->mutex); + } + /* No thread refers to this anymore */ + pthread_mutex_unlock(&share->mutex); + pthread_mutex_destroy(&share->mutex); + pthread_cond_destroy(&share->cond); + } + hash_free(&share->name_hash); + + /* We must copy mem_root from share because share is allocated through it */ + memcpy((char*) &mem_root, (char*) &share->mem_root, sizeof(mem_root)); + free_root(&mem_root, MYF(0)); // Free's share + DBUG_VOID_RETURN; +} + +/* + Read table definition from a binary / text based .frm file + SYNOPSIS - openfrm() + open_table_def() + thd Thread handler + share Fill this with table definition + db_flags Bit mask of the following flags: OPEN_VIEW - name path to table-file "db/name" - alias alias for table - db_stat open flags (for example HA_OPEN_KEYFILE|HA_OPEN_RNDFILE..) - can be 0 (example in ha_example_table) - prgflag READ_ALL etc.. - ha_open_flags HA_OPEN_ABORT_IF_LOCKED etc.. - outparam result table + NOTES + This function is called when the table definition is not cached in + table_def_cache + The data is returned in 'share', which is alloced by + alloc_table_share().. The code assumes that share is initialized. RETURN VALUES 0 ok - 1 Error (see frm_error) - 2 Error (see frm_error) + 1 Error (see open_table_error) + 2 Error (see open_table_error) 3 Wrong data in .frm file - 4 Error (see frm_error) - 5 Error (see frm_error: charset unavailable) + 4 Error (see open_table_error) + 5 Error (see open_table_error: charset unavailable) 6 Unknown .frm version */ -int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, - uint prgflag, uint ha_open_flags, TABLE *outparam) -{ - reg1 uint i; - reg2 uchar *strpos; - int j,error, errarg= 0; - uint rec_buff_length,n_length,int_length,records,key_parts,keys, - interval_count,interval_parts,read_length,db_create_options; - uint key_info_length, com_length; - ulong pos, record_offset; - char index_file[FN_REFLEN], *names, *keynames, *comment_pos; - uchar head[288],*disk_buff,new_field_pack_flag; - my_string record; - const char **int_array; - bool use_hash, null_field_first; - bool error_reported= FALSE; - File file; - Field **field_ptr,*reg_field; - KEY *keyinfo; - KEY_PART_INFO *key_part; - uchar *null_pos; - uint null_bit_pos, new_frm_ver, field_pack_length; - SQL_CRYPT *crypted=0; +int open_table_def(THD *thd, TABLE_SHARE *share, uint db_flags) +{ + int error, table_type; + bool error_given; + File file; + uchar head[288], *disk_buff; + char path[FN_REFLEN]; MEM_ROOT **root_ptr, *old_root; - TABLE_SHARE *share; - DBUG_ENTER("openfrm"); - DBUG_PRINT("enter",("name: '%s' form: 0x%lx",name,outparam)); + DBUG_ENTER("open_table_def"); + DBUG_PRINT("enter", ("name: '%s.%s'",share->db.str, share->table_name.str)); error= 1; + error_given= 0; disk_buff= NULL; - root_ptr= my_pthread_getspecific_ptr(MEM_ROOT**, THR_MALLOC); - old_root= *root_ptr; - bzero((char*) outparam,sizeof(*outparam)); - outparam->in_use= thd; - outparam->s= share= &outparam->share_not_to_be_used; + strxmov(path, share->normalized_path.str, reg_ext, NullS); + if ((file= my_open(path, O_RDONLY | O_SHARE, MYF(0))) < 0) + { + /* Try unecoded 5.0 name */ + uint length; + strxnmov(path, sizeof(path)-1, + mysql_data_home, "/", share->db.str, "/", + share->table_name.str, reg_ext, NullS); + length= unpack_filename(path, path) - reg_ext_length; + /* + The following is a safety test and should never fail + as the old file name should never be longer than the new one. + */ + DBUG_ASSERT(length <= share->normalized_path.length); + /* + If the old and the new names have the same length, + then table name does not have tricky characters, + so no need to check the old file name. + */ + if (length == share->normalized_path.length || + ((file= my_open(path, O_RDONLY | O_SHARE, MYF(0))) < 0)) + goto err_not_open; - if ((file=my_open(fn_format(index_file, name, "", reg_ext, - MY_UNPACK_FILENAME), - O_RDONLY | O_SHARE, - MYF(0))) - < 0) - goto err; + /* Unencoded 5.0 table name found */ + path[length]= '\0'; // Remove .frm extension + strmov(share->normalized_path.str, path); + share->normalized_path.length= length; + } error= 4; - if (my_read(file,(byte*) head,64,MYF(MY_NABP))) + if (my_read(file,(byte*) head, 64, MYF(MY_NABP))) goto err; - if (memcmp(head, STRING_WITH_LEN("TYPE=")) == 0) + if (head[0] == (uchar) 254 && head[1] == 1) { - // new .frm - my_close(file,MYF(MY_WME)); - - if (db_stat & NO_ERR_ON_NEW_FRM) - DBUG_RETURN(5); - file= -1; - // caller can't process new .frm + if (head[2] == FRM_VER || head[2] == FRM_VER+1 || + (head[2] >= FRM_VER+3 && head[2] <= FRM_VER+4)) + table_type= 1; + else + { + error= 6; // Unkown .frm version + goto err; + } + } + else if (memcmp(head, STRING_WITH_LEN("TYPE=")) == 0) + { + error= 5; + if (memcmp(head+5,"VIEW",4) == 0) + { + share->is_view= 1; + if (db_flags & OPEN_VIEW) + error= 0; + } goto err; } + else + goto err; + + /* No handling of text based files yet */ + if (table_type == 1) + { + root_ptr= my_pthread_getspecific_ptr(MEM_ROOT**, THR_MALLOC); + old_root= *root_ptr; + *root_ptr= &share->mem_root; + error= open_binary_frm(thd, share, head, file); + *root_ptr= old_root; - share->blob_ptr_size= sizeof(char*); - outparam->db_stat= db_stat; - init_sql_alloc(&outparam->mem_root, TABLE_ALLOC_BLOCK_SIZE, 0); - *root_ptr= &outparam->mem_root; + if (share->db.length == 5 && + !my_strcasecmp(system_charset_info, share->db.str, "mysql")) + { + /* + We can't mark all tables in 'mysql' database as system since we don't + allow to lock such tables for writing with any other tables (even with + other system tables) and some privilege tables need this. + */ + if (!my_strcasecmp(system_charset_info, share->table_name.str, "proc")) + share->system_table= 1; + else + { + if (!my_strcasecmp(system_charset_info, share->table_name.str, + "general_log")) + share->log_table= QUERY_LOG_GENERAL; + else + if (!my_strcasecmp(system_charset_info, share->table_name.str, + "slow_log")) + share->log_table= QUERY_LOG_SLOW; + } + } + error_given= 1; + } - share->table_name= strdup_root(&outparam->mem_root, - name+dirname_length(name)); - share->path= strdup_root(&outparam->mem_root, name); - outparam->alias= my_strdup(alias, MYF(MY_WME)); - if (!share->table_name || !share->path || !outparam->alias) - goto err; - *fn_ext(share->table_name)='\0'; // Remove extension - *fn_ext(share->path)='\0'; // Remove extension + if (!error) + thd->status_var.opened_shares++; - if (head[0] != (uchar) 254 || head[1] != 1) - goto err; /* purecov: inspected */ - if (head[2] != FRM_VER && head[2] != FRM_VER+1 && - ! (head[2] >= FRM_VER+3 && head[2] <= FRM_VER+4)) +err: + my_close(file, MYF(MY_WME)); + +err_not_open: + if (error && !error_given) { - error= 6; - goto err; /* purecov: inspected */ + share->error= error; + open_table_error(share, error, (share->open_errno= my_errno), 0); } - new_field_pack_flag=head[27]; + + DBUG_RETURN(error); +} + + +/* + Read data from a binary .frm file from MySQL 3.23 - 5.0 into TABLE_SHARE +*/ + +static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head, + File file) +{ + int error, errarg= 0; + uint new_frm_ver, field_pack_length, new_field_pack_flag; + uint interval_count, interval_parts, read_length, int_length; + uint db_create_options, keys, key_parts, n_length; + uint key_info_length, com_length, null_bit_pos; + uint extra_rec_buf_length; + uint i,j; + bool use_hash; + char *keynames, *record, *names, *comment_pos; + uchar *disk_buff, *strpos, *null_flags, *null_pos; + ulong pos, record_offset, *rec_per_key, rec_buff_length; + handler *handler_file= 0; + KEY *keyinfo; + KEY_PART_INFO *key_part; + SQL_CRYPT *crypted=0; + Field **field_ptr, *reg_field; + const char **interval_array; + enum legacy_db_type legacy_db_type; + DBUG_ENTER("open_binary_frm"); + + new_field_pack_flag= head[27]; new_frm_ver= (head[2] - FRM_VER); field_pack_length= new_frm_ver < 2 ? 11 : 17; + disk_buff= 0; - error=3; + error= 3; if (!(pos=get_form_pos(file,head,(TYPELIB*) 0))) goto err; /* purecov: inspected */ - *fn_ext(index_file)='\0'; // Remove .frm extension share->frm_version= head[2]; /* @@ -163,20 +449,23 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, if (share->frm_version == FRM_VER_TRUE_VARCHAR -1 && head[33] == 5) share->frm_version= FRM_VER_TRUE_VARCHAR; - share->db_type= ha_checktype(thd,(enum db_type) (uint) *(head+3),0,0); - share->db_create_options= db_create_options=uint2korr(head+30); +#ifdef WITH_PARTITION_STORAGE_ENGINE + share->default_part_db_type= + ha_checktype(thd, (enum legacy_db_type) (uint) *(head+61), 0, 0); + DBUG_PRINT("info", ("default_part_db_type = %u", head[61])); +#endif + legacy_db_type= (enum legacy_db_type) (uint) *(head+3); + share->db_type= ha_checktype(thd, legacy_db_type, 0, 0); + share->db_create_options= db_create_options= uint2korr(head+30); share->db_options_in_use= share->db_create_options; share->mysql_version= uint4korr(head+51); - null_field_first= 0; + share->null_field_first= 0; if (!head[32]) // New frm file in 3.23 { share->avg_row_length= uint4korr(head+34); share-> row_type= (row_type) head[40]; - share->raid_type= head[41]; - share->raid_chunks= head[42]; - share->raid_chunksize= uint4korr(head+43); share->table_charset= get_charset((uint) head[38],MYF(0)); - null_field_first= 1; + share->null_field_first= 1; } if (!share->table_charset) { @@ -187,7 +476,7 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, sql_print_warning("'%s' had no or invalid character set, " "and default character set is multi-byte, " "so character column sizes may have changed", - name); + share->path); } share->table_charset= default_charset_info; } @@ -195,7 +484,7 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, if (db_create_options & HA_OPTION_LONG_BLOB_PTR) share->blob_ptr_size= portable_sizeof_char_ptr; /* Set temporarily a good value for db_low_byte_first */ - share->db_low_byte_first= test(share->db_type != DB_TYPE_ISAM); + share->db_low_byte_first= test(legacy_db_type != DB_TYPE_ISAM); error=4; share->max_rows= uint4korr(head+18); share->min_rows= uint4korr(head+22); @@ -217,27 +506,23 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, } share->keys_for_keyread.init(0); share->keys_in_use.init(keys); - outparam->quick_keys.init(); - outparam->used_keys.init(); - outparam->keys_in_use_for_query.init(); n_length=keys*sizeof(KEY)+key_parts*sizeof(KEY_PART_INFO); - if (!(keyinfo = (KEY*) alloc_root(&outparam->mem_root, - n_length+uint2korr(disk_buff+4)))) + if (!(keyinfo = (KEY*) alloc_root(&share->mem_root, + n_length + uint2korr(disk_buff+4)))) goto err; /* purecov: inspected */ bzero((char*) keyinfo,n_length); - outparam->key_info=keyinfo; + share->key_info= keyinfo; key_part= my_reinterpret_cast(KEY_PART_INFO*) (keyinfo+keys); strpos=disk_buff+6; - ulong *rec_per_key; - if (!(rec_per_key= (ulong*) alloc_root(&outparam->mem_root, + if (!(rec_per_key= (ulong*) alloc_root(&share->mem_root, sizeof(ulong*)*key_parts))) goto err; for (i=0 ; i < keys ; i++, keyinfo++) { - keyinfo->table= outparam; + keyinfo->table= 0; // Updated in open_frm if (new_frm_ver >= 3) { keyinfo->flags= (uint) uint2korr(strpos) ^ HA_NOSAME; @@ -293,10 +578,8 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, #ifdef HAVE_CRYPTED_FRM else if (*(head+26) == 2) { - *root_ptr= old_root - crypted=get_crypt_for_frm(); - *root_ptr= &outparam->mem_root; - outparam->crypted=1; + crypted= get_crypt_for_frm(); + share->crypted= 1; } #endif @@ -304,13 +587,13 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, ((uint2korr(head+14) == 0xffff ? uint4korr(head+47) : uint2korr(head+14)))); - if ((n_length= uint2korr(head+55))) + if ((n_length= uint4korr(head+55))) { /* Read extra data segment */ char *buff, *next_chunk, *buff_end; + DBUG_PRINT("info", ("extra segment size is %u bytes", n_length)); if (!(next_chunk= buff= my_malloc(n_length, MYF(MY_WME)))) goto err; - buff_end= buff + n_length; if (my_pread(file, (byte*)buff, n_length, record_offset + share->reclength, MYF(MY_NABP))) { @@ -318,78 +601,137 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, goto err; } share->connect_string.length= uint2korr(buff); - if (! (share->connect_string.str= strmake_root(&outparam->mem_root, + if (! (share->connect_string.str= strmake_root(&share->mem_root, next_chunk + 2, share->connect_string.length))) { my_free(buff, MYF(0)); goto err; } next_chunk+= share->connect_string.length + 2; + buff_end= buff + n_length; if (next_chunk + 2 < buff_end) { uint str_db_type_length= uint2korr(next_chunk); - share->db_type= ha_resolve_by_name(next_chunk + 2, str_db_type_length); - DBUG_PRINT("enter", ("Setting dbtype to: %d - %d - '%.*s'\n", - share->db_type, - str_db_type_length, str_db_type_length, - next_chunk + 2)); + LEX_STRING name= { next_chunk + 2, str_db_type_length }; + handlerton *tmp_db_type= ha_resolve_by_name(thd, &name); + if (tmp_db_type != NULL) + { + share->db_type= tmp_db_type; + DBUG_PRINT("info", ("setting dbtype to '%.*s' (%d)", + str_db_type_length, next_chunk + 2, + ha_legacy_type(share->db_type))); + } +#ifdef WITH_PARTITION_STORAGE_ENGINE + else + { + if (!strncmp(next_chunk + 2, "partition", str_db_type_length)) + { + /* Use partition handler */ + share->db_type= &partition_hton; + DBUG_PRINT("info", ("setting dbtype to '%.*s' (%d)", + str_db_type_length, next_chunk + 2, + ha_legacy_type(share->db_type))); + } + } +#endif next_chunk+= str_db_type_length + 2; } + if (next_chunk + 5 < buff_end) + { + uint32 partition_info_len = uint4korr(next_chunk); +#ifdef WITH_PARTITION_STORAGE_ENGINE + if ((share->partition_info_len= partition_info_len)) + { + if (!(share->partition_info= + (uchar*) memdup_root(&share->mem_root, next_chunk + 4, + partition_info_len + 1))) + { + my_free(buff, MYF(0)); + goto err; + } + next_chunk++; + } +#else + if (partition_info_len) + { + DBUG_PRINT("info", ("WITH_PARTITION_STORAGE_ENGINE is not defined")); + my_free(buff, MYF(0)); + goto err; + } +#endif + next_chunk+= 5 + partition_info_len; + } + if (share->mysql_version > 50105 && next_chunk + 5 < buff_end) + { + /* + Partition state was introduced to support partition management in version 5.1.5 + */ + uint32 part_state_len= uint4korr(next_chunk); +#ifdef WITH_PARTITION_STORAGE_ENGINE + if ((share->part_state_len= part_state_len)) + if (!(share->part_state= + (uchar*) memdup_root(&share->mem_root, next_chunk + 4, + part_state_len))) + { + my_free(buff, MYF(0)); + goto err; + } +#else + if (part_state_len) + { + DBUG_PRINT("info", ("WITH_PARTITION_STORAGE_ENGINE is not defined")); + my_free(buff, MYF(0)); + goto err; + } +#endif + next_chunk+= 4 + part_state_len; + } +#ifdef WITH_PARTITION_STORAGE_ENGINE + else + { + share->part_state_len= 0; + share->part_state= NULL; + } +#endif + keyinfo= share->key_info; + for (i= 0; i < keys; i++, keyinfo++) + { + if (keyinfo->flags & HA_USES_PARSER) + { + LEX_STRING parser_name; + if (next_chunk >= buff_end) + { + DBUG_PRINT("error", + ("fulltext key uses parser that is not defined in .frm")); + my_free(buff, MYF(0)); + goto err; + } + parser_name.str= next_chunk; + parser_name.length= strlen(next_chunk); + keyinfo->parser= plugin_lock(&parser_name, MYSQL_FTPARSER_PLUGIN); + if (! keyinfo->parser) + { + my_free(buff, MYF(0)); + my_error(ER_PLUGIN_IS_NOT_LOADED, MYF(0), parser_name.str); + goto err; + } + } + } my_free(buff, MYF(0)); } - /* Allocate handler */ - if (!(outparam->file= get_new_handler(outparam, &outparam->mem_root, - share->db_type))) - goto err; error=4; - outparam->reginfo.lock_type= TL_UNLOCK; - outparam->current_lock=F_UNLCK; - if ((db_stat & HA_OPEN_KEYFILE) || (prgflag & DELAYED_OPEN)) - records=2; - else - records=1; - if (prgflag & (READ_ALL+EXTRA_RECORD)) - records++; - /* QQ: TODO, remove the +1 from below */ - rec_buff_length= ALIGN_SIZE(share->reclength + 1 + - outparam->file->extra_rec_buf_length()); + extra_rec_buf_length= uint2korr(head+59); + rec_buff_length= ALIGN_SIZE(share->reclength + 1 + extra_rec_buf_length); share->rec_buff_length= rec_buff_length; - if (!(record= (char *) alloc_root(&outparam->mem_root, - rec_buff_length * records))) + if (!(record= (char *) alloc_root(&share->mem_root, + rec_buff_length))) goto err; /* purecov: inspected */ share->default_values= (byte *) record; - if (my_pread(file,(byte*) record, (uint) share->reclength, record_offset, MYF(MY_NABP))) - goto err; /* purecov: inspected */ + goto err; /* purecov: inspected */ - if (records == 1) - { - /* We are probably in hard repair, and the buffers should not be used */ - outparam->record[0]= outparam->record[1]= share->default_values; - } - else - { - outparam->record[0]= (byte *) record+ rec_buff_length; - if (records > 2) - outparam->record[1]= (byte *) record+ rec_buff_length*2; - else - outparam->record[1]= outparam->record[0]; // Safety - } - -#ifdef HAVE_purify - /* - We need this because when we read var-length rows, we are not updating - bytes after end of varchar - */ - if (records > 1) - { - memcpy(outparam->record[0], share->default_values, rec_buff_length); - if (records > 2) - memcpy(outparam->record[1], share->default_values, rec_buff_length); - } -#endif VOID(my_seek(file,pos,MY_SEEK_SET,MYF(0))); if (my_read(file,(byte*) head,288,MYF(MY_NABP))) goto err; @@ -410,12 +752,12 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, int_length= uint2korr(head+274); share->null_fields= uint2korr(head+282); com_length= uint2korr(head+284); - share->comment= strdup_root(&outparam->mem_root, (char*) head+47); + share->comment= strdup_root(&share->mem_root, (char*) head+47); DBUG_PRINT("info",("i_count: %d i_parts: %d index: %d n_length: %d int_length: %d com_length: %d", interval_count,interval_parts, share->keys,n_length,int_length, com_length)); if (!(field_ptr = (Field **) - alloc_root(&outparam->mem_root, + alloc_root(&share->mem_root, (uint) ((share->fields+1)*sizeof(Field*)+ interval_count*sizeof(TYPELIB)+ (share->fields+interval_parts+ @@ -423,7 +765,7 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, (n_length+int_length+com_length))))) goto err; /* purecov: inspected */ - outparam->field=field_ptr; + share->field= field_ptr; read_length=(uint) (share->fields * field_pack_length + pos+ (uint) (n_length+int_length+com_length)); if (read_string(file,(gptr*) &disk_buff,read_length)) @@ -439,8 +781,8 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, strpos= disk_buff+pos; share->intervals= (TYPELIB*) (field_ptr+share->fields+1); - int_array= (const char **) (share->intervals+interval_count); - names= (char*) (int_array+share->fields+interval_parts+keys+3); + interval_array= (const char **) (share->intervals+interval_count); + names= (char*) (interval_array+share->fields+interval_parts+keys+3); if (!interval_count) share->intervals= 0; // For better debugging memcpy((char*) names, strpos+(share->fields*field_pack_length), @@ -448,8 +790,8 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, comment_pos= names+(n_length+int_length); memcpy(comment_pos, disk_buff+read_length-com_length, com_length); - fix_type_pointers(&int_array, &share->fieldnames, 1, &names); - fix_type_pointers(&int_array, share->intervals, interval_count, + fix_type_pointers(&interval_array, &share->fieldnames, 1, &names); + fix_type_pointers(&interval_array, share->intervals, interval_count, &names); { @@ -460,7 +802,7 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, interval++) { uint count= (uint) (interval->count + 1) * sizeof(uint); - if (!(interval->type_lengths= (uint *) alloc_root(&outparam->mem_root, + if (!(interval->type_lengths= (uint *) alloc_root(&share->mem_root, count))) goto err; for (count= 0; count < interval->count; count++) @@ -470,14 +812,17 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, } if (keynames) - fix_type_pointers(&int_array, &share->keynames, 1, &keynames); - VOID(my_close(file,MYF(MY_WME))); - file= -1; + fix_type_pointers(&interval_array, &share->keynames, 1, &keynames); + + /* Allocate handler */ + if (!(handler_file= get_new_handler(share, thd->mem_root, + share->db_type))) + goto err; - record= (char*) outparam->record[0]-1; /* Fieldstart = 1 */ - if (null_field_first) + record= (char*) share->default_values-1; /* Fieldstart = 1 */ + if (share->null_field_first) { - outparam->null_flags=null_pos=(uchar*) record+1; + null_flags= null_pos= (uchar*) record+1; null_bit_pos= (db_create_options & HA_OPTION_PACK_RECORD) ? 0 : 1; /* null_bytes below is only correct under the condition that @@ -486,13 +831,15 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, */ share->null_bytes= (share->null_fields + null_bit_pos + 7) / 8; } +#ifndef WE_WANT_TO_SUPPORT_VERY_OLD_FRM_FILES else { share->null_bytes= (share->null_fields+7)/8; - outparam->null_flags= null_pos= - (uchar*) (record+1+share->reclength-share->null_bytes); + null_flags= null_pos= (uchar*) (record + 1 +share->reclength - + share->null_bytes); null_bit_pos= 0; } +#endif use_hash= share->fields >= MAX_FIELDS_BEFORE_HASH; if (use_hash) @@ -607,16 +954,23 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, field_length= my_decimal_precision_to_length(field_length, decimals, f_is_dec(pack_flag) == 0); - sql_print_error("Found incompatible DECIMAL field '%s' in %s; Please do \"ALTER TABLE '%s' FORCE\" to fix it!", share->fieldnames.type_names[i], name, share->table_name); + sql_print_error("Found incompatible DECIMAL field '%s' in %s; " + "Please do \"ALTER TABLE '%s' FORCE\" to fix it!", + share->fieldnames.type_names[i], share->table_name.str, + share->table_name.str); push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_ERROR, ER_CRASHED_ON_USAGE, - "Found incompatible DECIMAL field '%s' in %s; Please do \"ALTER TABLE '%s' FORCE\" to fix it!", share->fieldnames.type_names[i], name, share->table_name); + "Found incompatible DECIMAL field '%s' in %s; " + "Please do \"ALTER TABLE '%s' FORCE\" to fix it!", + share->fieldnames.type_names[i], + share->table_name.str, + share->table_name.str); share->crashed= 1; // Marker for CHECK TABLE } #endif - *field_ptr=reg_field= - make_field(record+recpos, + *field_ptr= reg_field= + make_field(share, record+recpos, (uint32) field_length, null_pos, null_bit_pos, pack_flag, @@ -627,14 +981,14 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, (interval_nr ? share->intervals+interval_nr-1 : (TYPELIB*) 0), - share->fieldnames.type_names[i], - outparam); + share->fieldnames.type_names[i]); if (!reg_field) // Not supported field type { error= 4; goto err; /* purecov: inspected */ } + reg_field->fieldnr= i+1; //Set field number reg_field->field_index= i; reg_field->comment=comment; if (field_type == FIELD_TYPE_BIT && !f_bit_as_char(pack_flag)) @@ -652,12 +1006,15 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, } if (f_no_default(pack_flag)) reg_field->flags|= NO_DEFAULT_VALUE_FLAG; + if (reg_field->unireg_check == Field::NEXT_NUMBER) - outparam->found_next_number_field= reg_field; - if (outparam->timestamp_field == reg_field) + share->found_next_number_field= field_ptr; + if (share->timestamp_field == reg_field) share->timestamp_field_offset= i; + if (use_hash) - (void) my_hash_insert(&share->name_hash,(byte*) field_ptr); // never fail + (void) my_hash_insert(&share->name_hash, + (byte*) field_ptr); // never fail } *field_ptr=0; // End marker @@ -666,17 +1023,17 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, { uint primary_key=(uint) (find_type((char*) primary_key_name, &share->keynames, 3) - 1); - uint ha_option=outparam->file->table_flags(); - keyinfo=outparam->key_info; - key_part=keyinfo->key_part; + uint ha_option= handler_file->table_flags(); + keyinfo= share->key_info; + key_part= keyinfo->key_part; for (uint key=0 ; key < share->keys ; key++,keyinfo++) { - uint usable_parts=0; + uint usable_parts= 0; keyinfo->name=(char*) share->keynames.type_names[key]; /* Fix fulltext keys for old .frm files */ - if (outparam->key_info[key].flags & HA_FULLTEXT) - outparam->key_info[key].algorithm= HA_KEY_ALG_FULLTEXT; + if (share->key_info[key].flags & HA_FULLTEXT) + share->key_info[key].algorithm= HA_KEY_ALG_FULLTEXT; if (primary_key >= MAX_KEY && (keyinfo->flags & HA_NOSAME)) { @@ -689,8 +1046,8 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, { uint fieldnr= key_part[i].fieldnr; if (!fieldnr || - outparam->field[fieldnr-1]->null_ptr || - outparam->field[fieldnr-1]->key_length() != + share->field[fieldnr-1]->null_ptr || + share->field[fieldnr-1]->key_length() != key_part[i].length) { primary_key=MAX_KEY; // Can't be used @@ -701,129 +1058,123 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, for (i=0 ; i < keyinfo->key_parts ; key_part++,i++) { + Field *field; if (new_field_pack_flag <= 1) - key_part->fieldnr=(uint16) find_field(outparam, - (uint) key_part->offset, - (uint) key_part->length); -#ifdef EXTRA_DEBUG - if (key_part->fieldnr > share->fields) - goto err; // sanity check -#endif - if (key_part->fieldnr) - { // Should always be true ! - Field *field=key_part->field=outparam->field[key_part->fieldnr-1]; - if (field->null_ptr) - { - key_part->null_offset=(uint) ((byte*) field->null_ptr - - outparam->record[0]); - key_part->null_bit= field->null_bit; - key_part->store_length+=HA_KEY_NULL_LENGTH; - keyinfo->flags|=HA_NULL_PART_KEY; - keyinfo->extra_length+= HA_KEY_NULL_LENGTH; - keyinfo->key_length+= HA_KEY_NULL_LENGTH; - } - if (field->type() == FIELD_TYPE_BLOB || - field->real_type() == MYSQL_TYPE_VARCHAR) - { - if (field->type() == FIELD_TYPE_BLOB) - key_part->key_part_flag|= HA_BLOB_PART; - else - key_part->key_part_flag|= HA_VAR_LENGTH_PART; - keyinfo->extra_length+=HA_KEY_BLOB_LENGTH; - key_part->store_length+=HA_KEY_BLOB_LENGTH; - keyinfo->key_length+= HA_KEY_BLOB_LENGTH; - /* - Mark that there may be many matching values for one key - combination ('a', 'a ', 'a '...) - */ - if (!(field->flags & BINARY_FLAG)) - keyinfo->flags|= HA_END_SPACE_KEY; - } - if (field->type() == MYSQL_TYPE_BIT) - key_part->key_part_flag|= HA_BIT_PART; - - if (i == 0 && key != primary_key) - field->flags |= ((keyinfo->flags & HA_NOSAME) && - (keyinfo->key_parts == 1)) ? - UNIQUE_KEY_FLAG : MULTIPLE_KEY_FLAG; - if (i == 0) - field->key_start.set_bit(key); - if (field->key_length() == key_part->length && - !(field->flags & BLOB_FLAG)) - { - if (outparam->file->index_flags(key, i, 0) & HA_KEYREAD_ONLY) - { - share->keys_for_keyread.set_bit(key); - field->part_of_key.set_bit(key); - } - if (outparam->file->index_flags(key, i, 1) & HA_READ_ORDER) - field->part_of_sortkey.set_bit(key); - } - if (!(key_part->key_part_flag & HA_REVERSE_SORT) && - usable_parts == i) - usable_parts++; // For FILESORT - field->flags|= PART_KEY_FLAG; - if (key == primary_key) - { - field->flags|= PRI_KEY_FLAG; - /* - If this field is part of the primary key and all keys contains - the primary key, then we can use any key to find this column - */ - if (ha_option & HA_PRIMARY_KEY_IN_READ_INDEX) - field->part_of_key= share->keys_in_use; - } - if (field->key_length() != key_part->length) - { + key_part->fieldnr= (uint16) find_field(share->field, + (uint) key_part->offset, + (uint) key_part->length); + if (!key_part->fieldnr) + { + error= 4; // Wrong file + goto err; + } + field= key_part->field= share->field[key_part->fieldnr-1]; + if (field->null_ptr) + { + key_part->null_offset=(uint) ((byte*) field->null_ptr - + share->default_values); + key_part->null_bit= field->null_bit; + key_part->store_length+=HA_KEY_NULL_LENGTH; + keyinfo->flags|=HA_NULL_PART_KEY; + keyinfo->extra_length+= HA_KEY_NULL_LENGTH; + keyinfo->key_length+= HA_KEY_NULL_LENGTH; + } + if (field->type() == FIELD_TYPE_BLOB || + field->real_type() == MYSQL_TYPE_VARCHAR) + { + if (field->type() == FIELD_TYPE_BLOB) + key_part->key_part_flag|= HA_BLOB_PART; + else + key_part->key_part_flag|= HA_VAR_LENGTH_PART; + keyinfo->extra_length+=HA_KEY_BLOB_LENGTH; + key_part->store_length+=HA_KEY_BLOB_LENGTH; + keyinfo->key_length+= HA_KEY_BLOB_LENGTH; + /* + Mark that there may be many matching values for one key + combination ('a', 'a ', 'a '...) + */ + if (!(field->flags & BINARY_FLAG)) + keyinfo->flags|= HA_END_SPACE_KEY; + } + if (field->type() == MYSQL_TYPE_BIT) + key_part->key_part_flag|= HA_BIT_PART; + + if (i == 0 && key != primary_key) + field->flags |= (((keyinfo->flags & HA_NOSAME) && + (keyinfo->key_parts == 1)) ? + UNIQUE_KEY_FLAG : MULTIPLE_KEY_FLAG); + if (i == 0) + field->key_start.set_bit(key); + if (field->key_length() == key_part->length && + !(field->flags & BLOB_FLAG)) + { + if (handler_file->index_flags(key, i, 0) & HA_KEYREAD_ONLY) + { + share->keys_for_keyread.set_bit(key); + field->part_of_key.set_bit(key); + } + if (handler_file->index_flags(key, i, 1) & HA_READ_ORDER) + field->part_of_sortkey.set_bit(key); + } + if (!(key_part->key_part_flag & HA_REVERSE_SORT) && + usable_parts == i) + usable_parts++; // For FILESORT + field->flags|= PART_KEY_FLAG; + if (key == primary_key) + { + field->flags|= PRI_KEY_FLAG; + /* + If this field is part of the primary key and all keys contains + the primary key, then we can use any key to find this column + */ + if (ha_option & HA_PRIMARY_KEY_IN_READ_INDEX) + field->part_of_key= share->keys_in_use; + } + if (field->key_length() != key_part->length) + { #ifndef TO_BE_DELETED_ON_PRODUCTION - if (field->type() == FIELD_TYPE_NEWDECIMAL) - { - /* - Fix a fatal error in decimal key handling that causes crashes - on Innodb. We fix it by reducing the key length so that - InnoDB never gets a too big key when searching. - This allows the end user to do an ALTER TABLE to fix the - error. - */ - keyinfo->key_length-= (key_part->length - field->key_length()); - key_part->store_length-= (uint16)(key_part->length - - field->key_length()); - key_part->length= (uint16)field->key_length(); - sql_print_error("Found wrong key definition in %s; Please do \"ALTER TABLE '%s' FORCE \" to fix it!", name, share->table_name); - push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_ERROR, - ER_CRASHED_ON_USAGE, - "Found wrong key definition in %s; Please do \"ALTER TABLE '%s' FORCE\" to fix it!", name, share->table_name); - - share->crashed= 1; // Marker for CHECK TABLE - goto to_be_deleted; - } + if (field->type() == FIELD_TYPE_NEWDECIMAL) + { + /* + Fix a fatal error in decimal key handling that causes crashes + on Innodb. We fix it by reducing the key length so that + InnoDB never gets a too big key when searching. + This allows the end user to do an ALTER TABLE to fix the + error. + */ + keyinfo->key_length-= (key_part->length - field->key_length()); + key_part->store_length-= (uint16)(key_part->length - + field->key_length()); + key_part->length= (uint16)field->key_length(); + sql_print_error("Found wrong key definition in %s; " + "Please do \"ALTER TABLE '%s' FORCE \" to fix it!", + share->table_name.str, + share->table_name.str); + push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_ERROR, + ER_CRASHED_ON_USAGE, + "Found wrong key definition in %s; " + "Please do \"ALTER TABLE '%s' FORCE\" to fix " + "it!", + share->table_name.str, + share->table_name.str); + share->crashed= 1; // Marker for CHECK TABLE + goto to_be_deleted; + } #endif - key_part->key_part_flag|= HA_PART_KEY_SEG; - if (!(field->flags & BLOB_FLAG)) - { // Create a new field - field=key_part->field=field->new_field(&outparam->mem_root, - outparam); - field->field_length=key_part->length; - } - } + key_part->key_part_flag|= HA_PART_KEY_SEG; + } to_be_deleted: - /* - If the field can be NULL, don't optimize away the test - key_part_column = expression from the WHERE clause - as we need to test for NULL = NULL. - */ - if (field->real_maybe_null()) - key_part->key_part_flag|= HA_PART_KEY_SEG; - } - else - { // Error: shorten key - keyinfo->key_parts=usable_parts; - keyinfo->flags=0; - } + /* + If the field can be NULL, don't optimize away the test + key_part_column = expression from the WHERE clause + as we need to test for NULL = NULL. + */ + if (field->real_maybe_null()) + key_part->key_part_flag|= HA_PART_KEY_SEG; } - keyinfo->usable_key_parts=usable_parts; // Filesort + keyinfo->usable_key_parts= usable_parts; // Filesort set_if_bigger(share->max_key_length,keyinfo->key_length+ keyinfo->key_parts); @@ -844,11 +1195,15 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, If we are using an integer as the primary key then allow the user to refer to it as '_rowid' */ - if (outparam->key_info[primary_key].key_parts == 1) + if (share->key_info[primary_key].key_parts == 1) { - Field *field= outparam->key_info[primary_key].key_part[0].field; + Field *field= share->key_info[primary_key].key_part[0].field; if (field && field->result_type() == INT_RESULT) - outparam->rowid_field=field; + { + /* note that fieldnr here (and rowid_field_offset) starts from 1 */ + share->rowid_field_offset= (share->key_info[primary_key].key_part[0]. + fieldnr); + } } } else @@ -862,21 +1217,30 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, { /* Old file format with default as not null */ uint null_length= (share->null_fields+7)/8; - bfill(share->default_values + (outparam->null_flags - (uchar*) record), + bfill(share->default_values + (null_flags - (uchar*) record), null_length, 255); } - if ((reg_field=outparam->found_next_number_field)) + if (share->found_next_number_field) { + /* + We must have a table object for find_ref_key to calculate field offset + */ + TABLE tmp_table; + tmp_table.record[0]= share->default_values; + + reg_field= *share->found_next_number_field; + reg_field->table= &tmp_table; if ((int) (share->next_number_index= (uint) - find_ref_key(outparam,reg_field, + find_ref_key(share->key_info, share->keys, reg_field, &share->next_number_key_offset)) < 0) { - reg_field->unireg_check=Field::NONE; /* purecov: inspected */ - outparam->found_next_number_field=0; + reg_field->unireg_check= Field::NONE; /* purecov: inspected */ + share->found_next_number_field= 0; } else - reg_field->flags|=AUTO_INCREMENT_FLAG; + reg_field->flags |= AUTO_INCREMENT_FLAG; + reg_field->table= 0; } if (share->blob_fields) @@ -886,10 +1250,10 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, /* Store offsets to blob fields to find them fast */ if (!(share->blob_field= save= - (uint*) alloc_root(&outparam->mem_root, + (uint*) alloc_root(&share->mem_root, (uint) (share->blob_fields* sizeof(uint))))) goto err; - for (i=0, ptr= outparam->field ; *ptr ; ptr++, i++) + for (i=0, ptr= share->field ; *ptr ; ptr++, i++) { if ((*ptr)->flags & BLOB_FLAG) (*save++)= i; @@ -900,18 +1264,237 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, the correct null_bytes can now be set, since bitfields have been taken into account */ - share->null_bytes= (null_pos - (uchar*) outparam->null_flags + + share->null_bytes= (null_pos - (uchar*) null_flags + (null_bit_pos + 7) / 8); share->last_null_bit_pos= null_bit_pos; + share->db_low_byte_first= handler_file->low_byte_first(); + delete handler_file; +#ifndef DBUG_OFF + if (use_hash) + (void) hash_check(&share->name_hash); +#endif + DBUG_RETURN (0); + + err: + share->error= error; + share->open_errno= my_errno; + share->errarg= errarg; + x_free((gptr) disk_buff); + delete crypted; + delete handler_file; + hash_free(&share->name_hash); + + open_table_error(share, error, share->open_errno, errarg); + DBUG_RETURN(error); +} /* open_binary_frm */ + + +/* + Open a table based on a TABLE_SHARE + + SYNOPSIS + open_table_from_share() + thd Thread handler + share Table definition + alias Alias for table + db_stat open flags (for example HA_OPEN_KEYFILE| + HA_OPEN_RNDFILE..) can be 0 (example in + ha_example_table) + prgflag READ_ALL etc.. + ha_open_flags HA_OPEN_ABORT_IF_LOCKED etc.. + outparam result table + + RETURN VALUES + 0 ok + 1 Error (see open_table_error) + 2 Error (see open_table_error) + 3 Wrong data in .frm file + 4 Error (see open_table_error) + 5 Error (see open_table_error: charset unavailable) + 7 Table definition has changed in engine +*/ + +int open_table_from_share(THD *thd, TABLE_SHARE *share, const char *alias, + uint db_stat, uint prgflag, uint ha_open_flags, + TABLE *outparam, bool is_create_table) +{ + int error; + uint records, i; + bool error_reported= FALSE; + byte *record; + Field **field_ptr; + MEM_ROOT **root_ptr, *old_root; + DBUG_ENTER("open_table_from_share"); + DBUG_PRINT("enter",("name: '%s.%s' form: 0x%lx", share->db.str, + share->table_name.str, outparam)); + + error= 1; + root_ptr= my_pthread_getspecific_ptr(MEM_ROOT**, THR_MALLOC); + old_root= *root_ptr; + bzero((char*) outparam, sizeof(*outparam)); + outparam->in_use= thd; + outparam->s= share; + outparam->db_stat= db_stat; + outparam->write_row_record= NULL; + + init_sql_alloc(&outparam->mem_root, TABLE_ALLOC_BLOCK_SIZE, 0); + *root_ptr= &outparam->mem_root; + + if (!(outparam->alias= my_strdup(alias, MYF(MY_WME)))) + goto err; + outparam->quick_keys.init(); + outparam->used_keys.init(); + outparam->keys_in_use_for_query.init(); + + /* Allocate handler */ + if (!(outparam->file= get_new_handler(share, &outparam->mem_root, + share->db_type))) + goto err; + + error= 4; + outparam->reginfo.lock_type= TL_UNLOCK; + outparam->current_lock= F_UNLCK; + records=0; + if ((db_stat & HA_OPEN_KEYFILE) || (prgflag & DELAYED_OPEN)) + records=1; + if (prgflag & (READ_ALL+EXTRA_RECORD)) + records++; + + if (!(record= (byte*) alloc_root(&outparam->mem_root, + share->rec_buff_length * records))) + goto err; /* purecov: inspected */ + + if (records == 0) + { + /* We are probably in hard repair, and the buffers should not be used */ + outparam->record[0]= outparam->record[1]= share->default_values; + } + else + { + outparam->record[0]= record; + if (records > 1) + outparam->record[1]= record+ share->rec_buff_length; + else + outparam->record[1]= outparam->record[0]; // Safety + } + +#ifdef HAVE_purify + /* + We need this because when we read var-length rows, we are not updating + bytes after end of varchar + */ + if (records > 1) + { + memcpy(outparam->record[0], share->default_values, share->rec_buff_length); + if (records > 2) + memcpy(outparam->record[1], share->default_values, + share->rec_buff_length); + } +#endif + + if (!(field_ptr = (Field **) alloc_root(&outparam->mem_root, + (uint) ((share->fields+1)* + sizeof(Field*))))) + goto err; /* purecov: inspected */ + + outparam->field= field_ptr; + + record= (byte*) outparam->record[0]-1; /* Fieldstart = 1 */ + if (share->null_field_first) + outparam->null_flags= (uchar*) record+1; + else + outparam->null_flags= (uchar*) (record+ 1+ share->reclength - + share->null_bytes); + + /* Setup copy of fields from share, but use the right alias and record */ + for (i=0 ; i < share->fields; i++, field_ptr++) + { + if (!((*field_ptr)= share->field[i]->clone(&outparam->mem_root, outparam))) + goto err; + } + (*field_ptr)= 0; // End marker + + if (share->found_next_number_field) + outparam->found_next_number_field= + outparam->field[(uint) (share->found_next_number_field - share->field)]; + if (share->timestamp_field) + outparam->timestamp_field= (Field_timestamp*) outparam->field[share->timestamp_field_offset]; + + + /* Fix key->name and key_part->field */ + if (share->key_parts) + { + KEY *key_info, *key_info_end; + KEY_PART_INFO *key_part; + uint n_length; + n_length= share->keys*sizeof(KEY) + share->key_parts*sizeof(KEY_PART_INFO); + if (!(key_info= (KEY*) alloc_root(&outparam->mem_root, n_length))) + goto err; + outparam->key_info= key_info; + key_part= (my_reinterpret_cast(KEY_PART_INFO*) (key_info+share->keys)); + + memcpy(key_info, share->key_info, sizeof(*key_info)*share->keys); + memcpy(key_part, share->key_info[0].key_part, (sizeof(*key_part) * + share->key_parts)); + + for (key_info_end= key_info + share->keys ; + key_info < key_info_end ; + key_info++) + { + KEY_PART_INFO *key_part_end; + + key_info->table= outparam; + key_info->key_part= key_part; + + for (key_part_end= key_part+ key_info->key_parts ; + key_part < key_part_end ; + key_part++) + { + Field *field= key_part->field= outparam->field[key_part->fieldnr-1]; + + if (field->key_length() != key_part->length && + !(field->flags & BLOB_FLAG)) + { + /* + We are using only a prefix of the column as a key: + Create a new field for the key part that matches the index + */ + field= key_part->field=field->new_field(&outparam->mem_root, + outparam); + field->field_length= key_part->length; + } + } + } + } + +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (share->partition_info_len) + { + if (mysql_unpack_partition(thd, share->partition_info, + share->partition_info_len, + (uchar*)share->part_state, + share->part_state_len, + outparam, is_create_table, + share->default_part_db_type)) + goto err; + /* + Fix the partition functions and ensure they are not constant + functions + */ + if (fix_partition_func(thd, share->normalized_path.str, outparam, + is_create_table)) + goto err; + } +#endif + /* The table struct is now initialized; Open the table */ - error=2; + error= 2; if (db_stat) { int ha_err; - unpack_filename(index_file,index_file); if ((ha_err= (outparam->file-> - ha_open(index_file, + ha_open(outparam, share->normalized_path.str, (db_stat & HA_READ_ONLY ? O_RDONLY : O_RDWR), (db_stat & HA_OPEN_TEMPORARY ? HA_OPEN_TMP_TABLE : ((db_stat & HA_WAIT_IF_LOCKED) || @@ -928,8 +1511,10 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, if (ha_err == HA_ERR_NO_SUCH_TABLE) { - /* The table did not exists in storage engine, use same error message - as if the .frm file didn't exist */ + /* + The table did not exists in storage engine, use same error message + as if the .frm file didn't exist + */ error= 1; my_errno= ENOENT; } @@ -937,47 +1522,53 @@ int openfrm(THD *thd, const char *name, const char *alias, uint db_stat, { outparam->file->print_error(ha_err, MYF(0)); error_reported= TRUE; + if (ha_err == HA_ERR_TABLE_DEF_CHANGED) + error= 7; } goto err; /* purecov: inspected */ } } - share->db_low_byte_first= outparam->file->low_byte_first(); *root_ptr= old_root; thd->status_var.opened_tables++; -#ifndef DBUG_OFF - if (use_hash) - (void) hash_check(&share->name_hash); -#endif + DBUG_RETURN (0); err: - x_free((gptr) disk_buff); - if (file > 0) - VOID(my_close(file,MYF(MY_WME))); - - delete crypted; *root_ptr= old_root; if (! error_reported) - frm_error(error,outparam,name,ME_ERROR+ME_WAITTANG, errarg); + open_table_error(share, error, my_errno, 0); delete outparam->file; - outparam->file=0; // For easier errorchecking +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (outparam->part_info) + free_items(outparam->part_info->item_free_list); +#endif + outparam->file= 0; // For easier error checking outparam->db_stat=0; - hash_free(&share->name_hash); free_root(&outparam->mem_root, MYF(0)); // Safe to call on bzero'd root my_free((char*) outparam->alias, MYF(MY_ALLOW_ZERO_PTR)); DBUG_RETURN (error); -} /* openfrm */ - +} /* close a .frm file and it's tables */ -int closefrm(register TABLE *table) +int closefrm(register TABLE *table, bool free_share) { int error=0; + uint idx; + KEY *key_info; DBUG_ENTER("closefrm"); if (table->db_stat) error=table->file->close(); + key_info= table->key_info; + for (idx= table->s->keys; idx; idx--, key_info++) + { + if (key_info->flags & HA_USES_PARSER) + { + plugin_unlock(key_info->parser); + key_info->flags= 0; + } + } my_free((char*) table->alias, MYF(MY_ALLOW_ZERO_PTR)); table->alias= 0; if (table->field) @@ -988,7 +1579,21 @@ int closefrm(register TABLE *table) } delete table->file; table->file= 0; /* For easier errorchecking */ - hash_free(&table->s->name_hash); +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (table->part_info) + { + free_items(table->part_info->item_free_list); + table->part_info->item_free_list= 0; + table->part_info= 0; + } +#endif + if (free_share) + { + if (table->s->tmp_table == NO_TMP_TABLE) + release_table_share(table->s, RELEASE_NORMAL); + else + free_table_share(table->s); + } free_root(&table->mem_root, MYF(0)); DBUG_RETURN(error); } @@ -1147,38 +1752,44 @@ ulong make_new_entry(File file, uchar *fileinfo, TYPELIB *formnames, /* error message when opening a form file */ -static void frm_error(int error, TABLE *form, const char *name, - myf errortype, int errarg) +void open_table_error(TABLE_SHARE *share, int error, int db_errno, int errarg) { int err_no; char buff[FN_REFLEN]; - const char *form_dev="",*datext; - const char *real_name= (char*) name+dirname_length(name); - DBUG_ENTER("frm_error"); + myf errortype= ME_ERROR+ME_WAITTANG; + DBUG_ENTER("open_table_error"); switch (error) { + case 7: case 1: - if (my_errno == ENOENT) + if (db_errno == ENOENT) + my_error(ER_NO_SUCH_TABLE, MYF(0), share->db.str, share->table_name.str); + else { - char *db; - uint length=dirname_part(buff,name); - buff[length-1]=0; - db=buff+dirname_length(buff); - my_error(ER_NO_SUCH_TABLE, MYF(0), db, real_name); + strxmov(buff, share->normalized_path.str, reg_ext, NullS); + my_error((db_errno == EMFILE) ? ER_CANT_OPEN_FILE : ER_FILE_NOT_FOUND, + errortype, buff, db_errno); } - else - my_error((my_errno == EMFILE) ? ER_CANT_OPEN_FILE : ER_FILE_NOT_FOUND, - errortype, - fn_format(buff, name, form_dev, reg_ext, 0), my_errno); break; case 2: { - datext= form->file ? *form->file->bas_ext() : ""; - datext= datext==NullS ? "" : datext; - err_no= (my_errno == ENOENT) ? ER_FILE_NOT_FOUND : (my_errno == EAGAIN) ? + handler *file= 0; + const char *datext= ""; + + if (share->db_type != NULL) + { + if ((file= get_new_handler(share, current_thd->mem_root, + share->db_type))) + { + if (!(datext= *file->bas_ext())) + datext= ""; + } + } + err_no= (db_errno == ENOENT) ? ER_FILE_NOT_FOUND : (db_errno == EAGAIN) ? ER_FILE_USED : ER_CANT_OPEN_FILE; - my_error(err_no,errortype, - fn_format(buff,real_name,form_dev,datext,2),my_errno); + strxmov(buff, share->normalized_path.str, datext, NullS); + my_error(err_no,errortype, buff, db_errno); + delete file; break; } case 5: @@ -1192,23 +1803,24 @@ static void frm_error(int error, TABLE *form, const char *name, } my_printf_error(ER_UNKNOWN_COLLATION, "Unknown collation '%s' in table '%-.64s' definition", - MYF(0), csname, real_name); + MYF(0), csname, share->table_name.str); break; } case 6: + strxmov(buff, share->normalized_path.str, reg_ext, NullS); my_printf_error(ER_NOT_FORM_FILE, "Table '%-.64s' was created with a different version " - "of MySQL and cannot be read", - MYF(0), name); + "of MySQL and cannot be read", + MYF(0), buff); break; default: /* Better wrong error than none */ case 4: - my_error(ER_NOT_FORM_FILE, errortype, - fn_format(buff, name, form_dev, reg_ext, 0)); + strxmov(buff, share->normalized_path.str, reg_ext, NullS); + my_error(ER_NOT_FORM_FILE, errortype, buff, 0); break; } DBUG_VOID_RETURN; -} /* frm_error */ +} /* open_table_error */ /* @@ -1288,22 +1900,21 @@ TYPELIB *typelib(MEM_ROOT *mem_root, List<String> &strings) # field number +1 */ -static uint find_field(TABLE *form,uint start,uint length) +static uint find_field(Field **fields, uint start, uint length) { Field **field; - uint i, pos, fields; + uint i, pos; - pos=0; - fields= form->s->fields; - for (field=form->field, i=1 ; i<= fields ; i++,field++) + pos= 0; + for (field= fields, i=1 ; *field ; i++,field++) { if ((*field)->offset() == start) { if ((*field)->key_length() == length) return (i); - if (!pos || form->field[pos-1]->pack_length() < + if (!pos || fields[pos-1]->pack_length() < (*field)->pack_length()) - pos=i; + pos= i; } } return (pos); @@ -1393,13 +2004,15 @@ void append_unescaped(String *res, const char *pos, uint length) res->append('\''); } + /* Create a .frm file */ -File create_frm(THD *thd, my_string name, const char *db, +File create_frm(THD *thd, const char *name, const char *db, const char *table, uint reclength, uchar *fileinfo, - HA_CREATE_INFO *create_info, uint keys) + HA_CREATE_INFO *create_info, uint keys) { register File file; + uint key_length; ulong length; char fill[IO_SIZE]; int create_flags= O_RDWR | O_TRUNC; @@ -1413,12 +2026,6 @@ File create_frm(THD *thd, my_string name, const char *db, if (create_info->min_rows > UINT_MAX32) create_info->min_rows= UINT_MAX32; - /* - Ensure that raid_chunks can't be larger than 255, as this would cause - problems with drop database - */ - set_if_smaller(create_info->raid_chunks, 255); - if ((file= my_create(name, CREATE_MODE, create_flags, MYF(0))) >= 0) { uint key_length, tmp_key_length; @@ -1429,7 +2036,8 @@ File create_frm(THD *thd, my_string name, const char *db, fileinfo[1]= 1; fileinfo[2]= FRM_VER+3+ test(create_info->varchar); - fileinfo[3]= (uchar) ha_checktype(thd,create_info->db_type,0,0); + fileinfo[3]= (uchar) ha_legacy_type( + ha_checktype(thd,ha_legacy_type(create_info->db_type),0,0)); fileinfo[4]=1; int2store(fileinfo+6,IO_SIZE); /* Next block starts here */ key_length=keys*(7+NAME_LEN+MAX_REF_PARTS*9)+16; @@ -1450,13 +2058,17 @@ File create_frm(THD *thd, my_string name, const char *db, fileinfo[38]= (create_info->default_table_charset ? create_info->default_table_charset->number : 0); fileinfo[40]= (uchar) create_info->row_type; - fileinfo[41]= (uchar) create_info->raid_type; - fileinfo[42]= (uchar) create_info->raid_chunks; - int4store(fileinfo+43,create_info->raid_chunksize); + /* Next few bytes were for RAID support */ + fileinfo[41]= 0; + fileinfo[42]= 0; + fileinfo[43]= 0; + fileinfo[44]= 0; + fileinfo[45]= 0; + fileinfo[46]= 0; int4store(fileinfo+47, key_length); tmp= MYSQL_VERSION_ID; // Store to avoid warning from int4store int4store(fileinfo+51, tmp); - int2store(fileinfo+55, create_info->extra_size); + int4store(fileinfo+55, create_info->extra_size); bzero(fill,IO_SIZE); for (; length > IO_SIZE ; length-= IO_SIZE) { @@ -1489,9 +2101,6 @@ void update_create_info_from_table(HA_CREATE_INFO *create_info, TABLE *table) create_info->table_options= share->db_create_options; create_info->avg_row_length= share->avg_row_length; create_info->row_type= share->row_type; - create_info->raid_type= share->raid_type; - create_info->raid_chunks= share->raid_chunks; - create_info->raid_chunksize= share->raid_chunksize; create_info->default_table_charset= share->table_charset; create_info->table_charset= 0; @@ -1611,9 +2220,6 @@ bool check_db_name(char *name) #else last_char_is_space= *name==' '; #endif - if (*name == '/' || *name == '\\' || *name == FN_LIBCHAR || - *name == FN_EXTCHAR) - return 1; name++; } return last_char_is_space || (uint) (name - start) > NAME_LEN; @@ -1622,8 +2228,7 @@ bool check_db_name(char *name) /* Allow anything as a table name, as long as it doesn't contain an - a '/', or a '.' character - or ' ' at the end + ' ' at the end returns 1 on error */ @@ -1654,8 +2259,6 @@ bool check_table_name(const char *name, uint length) } } #endif - if (*name == '/' || *name == '\\' || *name == FN_EXTCHAR) - return 1; name++; } #if defined(USE_MB) && defined(USE_MB_IDENT) @@ -1696,6 +2299,144 @@ bool check_column_name(const char *name) return last_char_is_space || (uint) (name - start) > NAME_LEN; } + +/* + Checks whether a table is intact. Should be done *just* after the table has + been opened. + + Synopsis + table_check_intact() + table - the table to check + table_f_count - expected number of columns in the table + table_def - expected structure of the table (column name and type) + last_create_time- the table->file->create_time of the table in memory + we have checked last time + error_num - ER_XXXX from the error messages file. When 0 no error + is sent to the client in case types does not match. + If different col number either + ER_COL_COUNT_DOESNT_MATCH_PLEASE_UPDATE or + ER_COL_COUNT_DOESNT_MATCH_CORRUPTED is used + + RETURNS + 0 - OK + 1 - There was an error +*/ + +my_bool +table_check_intact(TABLE *table, uint table_f_count, + TABLE_FIELD_W_TYPE *table_def, time_t *last_create_time, + int error_num) +{ + uint i; + my_bool error= FALSE; + my_bool fields_diff_count; + DBUG_ENTER("table_check_intact"); + DBUG_PRINT("info",("table=%s expected_count=%d",table->alias, table_f_count)); + DBUG_PRINT("info",("last_create_time=%d", *last_create_time)); + + if ((fields_diff_count= (table->s->fields != table_f_count)) || + (*last_create_time != table->file->create_time)) + { + DBUG_PRINT("info", ("I am suspecting, checking table")); + if (fields_diff_count) + { + // previous MySQL version + error= TRUE; + if (MYSQL_VERSION_ID > table->s->mysql_version) + my_error(ER_COL_COUNT_DOESNT_MATCH_PLEASE_UPDATE, MYF(0), table->alias, + table_f_count, table->s->fields, table->s->mysql_version, + MYSQL_VERSION_ID); + else if (MYSQL_VERSION_ID == table->s->mysql_version) + my_error(ER_COL_COUNT_DOESNT_MATCH_CORRUPTED,MYF(0), table->alias, + table_f_count, table->s->fields); + else + /* + moving from newer mysql to older one -> let's say not an error but + will check the definition afterwards. If a column was added at the end + then we don't care much since it's not in the middle. + */ + error= FALSE; + } + //definitely something has changed + char buffer[255]; + for (i=0 ;i < table_f_count; ++i, ++table_def) + { + Field *field= table->field[i]; + String sql_type(buffer, sizeof(buffer), system_charset_info); + sql_type.length(0); + /* + name changes are not fatal, we use sequence numbers => no prob for us + but this can show tampered table or broken table. + */ + if (!fields_diff_count || i < table->s->fields) + { + if (strncmp(field->field_name, table_def->name.str, + table_def->name.length)) + { + sql_print_error("(%s) Expected field %s at position %d, found %s", + table->alias, table_def->name.str, i, + field->field_name); + } + + /* + IF the type does not match than something is really wrong + Check up to length - 1. Why? + 1. datetime -> datetim -> the same + 2. int(11) -> int(11 -> the same + 3. set('one','two') -> set('one','two' + so for sets if the same prefix is there it's ok if more are + added as part of the set. The same is valid for enum. So a new + table running on a old server will be valid. + */ + field->sql_type(sql_type); + if (strncmp(sql_type.c_ptr(), table_def->type.str, + table_def->type.length - 1)) + { + sql_print_error("(%s) Expected field %s at position %d to have type " + "%s, found %s", table->alias, table_def->name.str, + i, table_def->type.str, sql_type.c_ptr()); + error= TRUE; + } + else if (table_def->cset.str && !field->has_charset()) + { + sql_print_error("(%s) Expected field %s at position %d to have " + "character set '%s' but found no such", table->alias, + table_def->name.str, i, table_def->cset.str); + error= TRUE; + } + else if (table_def->cset.str && + strcmp(field->charset()->csname, table_def->cset.str)) + { + sql_print_error("(%s) Expected field %s at position %d to have " + "character set '%s' but found '%s'", table->alias, + table_def->name.str, i, table_def->cset.str, + field->charset()->csname); + error= TRUE; + } + } + else + { + sql_print_error("(%s) Expected field %s at position %d to have type %s " + " but no field found.", table_def->name.str, + table_def->name.str, i, table_def->type.str); + error= TRUE; + } + } + if (!error) + *last_create_time= table->file->create_time; + else if (!fields_diff_count && error_num) + my_error(error_num,MYF(0), table->alias, table_f_count, table->s->fields); + } + else + { + DBUG_PRINT("info", ("Table seems ok without thorough checking.")); + *last_create_time= table->file->create_time; + } + + DBUG_RETURN(error); +} + + /* Create Item_field for each column in the table. @@ -2601,9 +3342,9 @@ const char *Natural_join_column::db_name() are inconsistent in this respect. */ DBUG_ASSERT(!strcmp(table_ref->db, - table_ref->table->s->db) || + table_ref->table->s->db.str) || (table_ref->schema_table && - table_ref->table->s->db[0] == 0)); + table_ref->table->s->db.str[0] == 0)); return table_ref->db; } @@ -2793,7 +3534,7 @@ const char *Field_iterator_table_ref::table_name() return natural_join_it.column_ref()->table_name(); DBUG_ASSERT(!strcmp(table_ref->table_name, - table_ref->table->s->table_name)); + table_ref->table->s->table_name.str)); return table_ref->table_name; } @@ -2810,9 +3551,9 @@ const char *Field_iterator_table_ref::db_name() ensure consistency. An exception are I_S schema tables, which are inconsistent in this respect. */ - DBUG_ASSERT(!strcmp(table_ref->db, table_ref->table->s->db) || + DBUG_ASSERT(!strcmp(table_ref->db, table_ref->table->s->db.str) || (table_ref->schema_table && - table_ref->table->s->db[0] == 0)); + table_ref->table->s->db.str[0] == 0)); return table_ref->db; } diff --git a/sql/table.h b/sql/table.h index cd511e7b5ee..2bde68e5919 100644 --- a/sql/table.h +++ b/sql/table.h @@ -21,6 +21,7 @@ class Item; /* Needed by ORDER */ class GRANT_TABLE; class st_select_lex_unit; class st_select_lex; +class partition_info; class COND_EQUAL; class Security_context; @@ -55,8 +56,11 @@ typedef struct st_grant_info ulong orig_want_privilege; } GRANT_INFO; -enum tmp_table_type {NO_TMP_TABLE=0, TMP_TABLE=1, TRANSACTIONAL_TMP_TABLE=2, - SYSTEM_TMP_TABLE=3}; +enum tmp_table_type +{ + NO_TMP_TABLE, TMP_TABLE, TRANSACTIONAL_TMP_TABLE, + INTERNAL_TMP_TABLE, SYSTEM_TMP_TABLE +}; enum frm_type_enum { @@ -65,6 +69,8 @@ enum frm_type_enum FRMTYPE_VIEW }; +enum release_type { RELEASE_NORMAL, RELEASE_WAIT_FOR_DROP }; + typedef struct st_filesort_info { IO_CACHE *io_cache; /* If sorted through filebyte */ @@ -112,43 +118,49 @@ typedef struct st_table_share TYPELIB keynames; /* Pointers to keynames */ TYPELIB fieldnames; /* Pointer to fieldnames */ TYPELIB *intervals; /* pointer to interval info */ -#ifdef NOT_YET pthread_mutex_t mutex; /* For locking the share */ pthread_cond_t cond; /* To signal that share is ready */ + struct st_table_share *next, /* Link to unused shares */ + **prev; +#ifdef NOT_YET struct st_table *open_tables; /* link to open tables */ - struct st_table *used_next, /* Link to used tables */ - **used_prev; +#endif + /* The following is copied to each TABLE on OPEN */ Field **field; + Field **found_next_number_field; + Field *timestamp_field; /* Used only during open */ KEY *key_info; /* data of keys in database */ -#endif uint *blob_field; /* Index to blobs in Field arrray*/ + byte *default_values; /* row with default values */ char *comment; /* Comment about table */ CHARSET_INFO *table_charset; /* Default charset of string fields */ /* A pair "database_name\0table_name\0", widely used as simply a db name */ - char *table_cache_key; - const char *db; /* Pointer to db */ - const char *table_name; /* Table name (for open) */ - const char *path; /* Path to .frm file (from datadir) */ + LEX_STRING table_cache_key; + LEX_STRING db; /* Pointer to db */ + LEX_STRING table_name; /* Table name (for open) */ + LEX_STRING path; /* Path to .frm file (from datadir) */ + LEX_STRING normalized_path; /* unpack_filename(path) */ LEX_STRING connect_string; key_map keys_in_use; /* Keys in use for table */ key_map keys_for_keyread; + ha_rows min_rows, max_rows; /* create information */ ulong avg_row_length; /* create information */ ulong raid_chunksize; ulong version, flush_version, mysql_version; ulong timestamp_offset; /* Set to offset+1 of record */ ulong reclength; /* Recordlength */ - ha_rows min_rows, max_rows; /* create information */ - enum db_type db_type; /* table_type for handler */ + handlerton *db_type; /* table_type for handler */ enum row_type row_type; /* How rows are stored */ enum tmp_table_type tmp_table; + uint ref_count; /* How many TABLE objects uses this */ + uint open_count; /* Number of tables in open list */ uint blob_ptr_size; /* 4 or 8 */ uint null_bytes, last_null_bit_pos; - uint key_length; /* Length of table_cache_key */ uint fields; /* Number of fields */ uint rec_buff_length; /* Size of table->record[] buffer */ uint keys, key_parts; @@ -156,31 +168,48 @@ typedef struct st_table_share uint uniques; /* Number of UNIQUE index */ uint null_fields; /* number of null fields */ uint blob_fields; /* number of blob fields */ + uint timestamp_field_offset; /* Field number for timestamp field */ uint varchar_fields; /* number of varchar fields */ uint db_create_options; /* Create options from database */ uint db_options_in_use; /* Options in use */ uint db_record_offset; /* if HA_REC_IN_SEQ */ uint raid_type, raid_chunks; - uint open_count; /* Number of tables in open list */ + uint rowid_field_offset; /* Field_nr +1 to rowid field */ /* Index of auto-updated TIMESTAMP field in field array */ uint primary_key; - uint timestamp_field_offset; uint next_number_index; uint next_number_key_offset; - uchar frm_version; - my_bool system; /* Set if system record */ - my_bool crypted; /* If .frm file is crypted */ - my_bool db_low_byte_first; /* Portable row format */ - my_bool crashed; - my_bool is_view; - my_bool name_lock, replace_with_name_lock; + uint error, open_errno, errarg; /* error from open_table_def() */ + uchar frm_version; + bool null_field_first; + bool system; /* Set if system table (one record) */ + bool crypted; /* If .frm file is crypted */ + bool db_low_byte_first; /* Portable row format */ + bool crashed; + bool is_view; + bool name_lock, replace_with_name_lock; + bool waiting_on_cond; /* Protection against free */ + ulong table_map_id; /* for row-based replication */ + ulonglong table_map_version; /* TRUE if this is a system table like 'mysql.proc', which we want to be able to open and lock even when we already have some tables open and locked. To avoid deadlocks we have to put certain restrictions on locking of this table for writing. FALSE - otherwise. */ - my_bool system_table; + bool system_table; + /* + This flag is set for the log tables. Used during FLUSH instances to skip + log tables, while closing tables (since logs must be always available) + */ + bool log_table; +#ifdef WITH_PARTITION_STORAGE_ENGINE + const uchar *partition_info; + uint partition_info_len; + const uchar *part_state; + uint part_state_len; + handlerton *default_part_db_type; +#endif } TABLE_SHARE; @@ -193,21 +222,22 @@ struct st_table { handler *file; #ifdef NOT_YET struct st_table *used_next, **used_prev; /* Link to used tables */ - struct st_table *open_next, **open_prev; /* Link to open tables */ #endif + struct st_table *open_next, **open_prev; /* Link to open tables */ struct st_table *next, *prev; THD *in_use; /* Which thread uses this */ Field **field; /* Pointer to fields */ byte *record[2]; /* Pointer to records */ + byte *write_row_record; /* Used as optimisation in + THD::write_row */ byte *insert_values; /* used by INSERT ... UPDATE */ key_map quick_keys, used_keys, keys_in_use_for_query; KEY *key_info; /* data of keys in database */ - Field *next_number_field, /* Set if next_number is activated */ - *found_next_number_field, /* Set on open */ - *rowid_field; + Field *next_number_field; /* Set if next_number is activated */ + Field *found_next_number_field; /* Set on open */ Field_timestamp *timestamp_field; /* Table's triggers, 0 if there are no of them */ @@ -216,6 +246,8 @@ struct st_table { ORDER *group; const char *alias; /* alias or table name */ uchar *null_flags; + MY_BITMAP *read_set; + MY_BITMAP *write_set; query_id_t query_id; ha_rows quick_rows[MAX_KEY]; @@ -261,6 +293,7 @@ struct st_table { my_bool distinct,const_table,no_rows; my_bool key_read, no_keyread; my_bool locked_by_flush; + my_bool locked_by_logger; my_bool locked_by_name; my_bool fulltext_searched; my_bool no_cache; @@ -269,12 +302,16 @@ struct st_table { my_bool auto_increment_field_not_null; my_bool insert_or_update; /* Can be used by the handler */ my_bool alias_name_used; /* true if table_name is alias */ + my_bool get_fields_in_item_tree; /* Signal to fix_field */ REGINFO reginfo; /* field connections */ MEM_ROOT mem_root; GRANT_INFO grant; FILESORT_INFO sort; - TABLE_SHARE share_not_to_be_used; /* To be deleted when true shares */ +#ifdef WITH_PARTITION_STORAGE_ENGINE + partition_info *part_info; /* Partition related information */ + bool no_partitions_used; /* If true, all partitions have been pruned away */ +#endif bool fill_item_list(List<Item> *item_list) const; void reset_item_list(List<Item> *item_list) const; @@ -291,6 +328,9 @@ typedef struct st_foreign_key_info List<LEX_STRING> referenced_fields; } FOREIGN_KEY_INFO; +/* + Make sure that the order of schema_tables and enum_schema_tables are the same. +*/ enum enum_schema_tables { @@ -299,8 +339,14 @@ enum enum_schema_tables SCH_COLLATION_CHARACTER_SET_APPLICABILITY, SCH_COLUMNS, SCH_COLUMN_PRIVILEGES, + SCH_ENGINES, + SCH_EVENTS, + SCH_FILES, SCH_KEY_COLUMN_USAGE, SCH_OPEN_TABLES, + SCH_PARTITIONS, + SCH_PLUGINS, + SCH_PROCESSLIST, SCH_PROCEDURES, SCH_SCHEMATA, SCH_SCHEMA_PRIVILEGES, @@ -618,6 +664,7 @@ typedef struct st_table_list bool where_processed; /* FRMTYPE_ERROR if any type is acceptable */ enum frm_type_enum required_type; + handlerton *db_type; /* table_type for handler */ char timestamp_buffer[20]; /* buffer for timestamp (19+1) */ /* This TABLE_LIST object is just placeholder for prelocking, it will be @@ -824,4 +871,15 @@ typedef struct st_open_table_list{ uint32 in_use,locked; } OPEN_TABLE_LIST; +typedef struct st_table_field_w_type +{ + LEX_STRING name; + LEX_STRING type; + LEX_STRING cset; +} TABLE_FIELD_W_TYPE; + +my_bool +table_check_intact(TABLE *table, uint table_f_count, + TABLE_FIELD_W_TYPE *table_def, time_t *last_create_time, + int error_num); diff --git a/sql/time.cc b/sql/time.cc index 5069031081d..efe1cbf1c09 100644 --- a/sql/time.cc +++ b/sql/time.cc @@ -711,12 +711,126 @@ void make_truncated_value_warning(THD *thd, const char *str_val, type_str, str.c_ptr(), field_name, (ulong) thd->row_count); else - cs->cset->snprintf(cs, warn_buff, sizeof(warn_buff), - ER(ER_TRUNCATED_WRONG_VALUE), - type_str, str.c_ptr()); + { + if (time_type > MYSQL_TIMESTAMP_ERROR) + cs->cset->snprintf(cs, warn_buff, sizeof(warn_buff), + ER(ER_TRUNCATED_WRONG_VALUE), + type_str, str.c_ptr()); + else + cs->cset->snprintf(cs, warn_buff, sizeof(warn_buff), + ER(ER_WRONG_VALUE), type_str, str.c_ptr()); + } push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_TRUNCATED_WRONG_VALUE, warn_buff); } +#define MAX_DAY_NUMBER 3652424L + +bool date_add_interval(TIME *ltime, interval_type int_type, INTERVAL interval) +{ + long period, sign; + + ltime->neg= 0; + + sign= (interval.neg ? -1 : 1); + + switch (int_type) { + case INTERVAL_SECOND: + case INTERVAL_SECOND_MICROSECOND: + case INTERVAL_MICROSECOND: + case INTERVAL_MINUTE: + case INTERVAL_HOUR: + case INTERVAL_MINUTE_MICROSECOND: + case INTERVAL_MINUTE_SECOND: + case INTERVAL_HOUR_MICROSECOND: + case INTERVAL_HOUR_SECOND: + case INTERVAL_HOUR_MINUTE: + case INTERVAL_DAY_MICROSECOND: + case INTERVAL_DAY_SECOND: + case INTERVAL_DAY_MINUTE: + case INTERVAL_DAY_HOUR: + { + longlong sec, days, daynr, microseconds, extra_sec; + ltime->time_type= MYSQL_TIMESTAMP_DATETIME; // Return full date + microseconds= ltime->second_part + sign*interval.second_part; + extra_sec= microseconds/1000000L; + microseconds= microseconds%1000000L; + + sec=((ltime->day-1)*3600*24L+ltime->hour*3600+ltime->minute*60+ + ltime->second + + sign* (longlong) (interval.day*3600*24L + + interval.hour*LL(3600)+interval.minute*LL(60)+ + interval.second))+ extra_sec; + if (microseconds < 0) + { + microseconds+= LL(1000000); + sec--; + } + days= sec/(3600*LL(24)); + sec-= days*3600*LL(24); + if (sec < 0) + { + days--; + sec+= 3600*LL(24); + } + ltime->second_part= (uint) microseconds; + ltime->second= (uint) (sec % 60); + ltime->minute= (uint) (sec/60 % 60); + ltime->hour= (uint) (sec/3600); + daynr= calc_daynr(ltime->year,ltime->month,1) + days; + /* Day number from year 0 to 9999-12-31 */ + if ((ulonglong) daynr >= MAX_DAY_NUMBER) + goto invalid_date; + get_date_from_daynr((long) daynr, <ime->year, <ime->month, + <ime->day); + break; + } + case INTERVAL_DAY: + case INTERVAL_WEEK: + period= (calc_daynr(ltime->year,ltime->month,ltime->day) + + sign * (long) interval.day); + /* Daynumber from year 0 to 9999-12-31 */ + if ((ulong) period >= MAX_DAY_NUMBER) + goto invalid_date; + get_date_from_daynr((long) period,<ime->year,<ime->month,<ime->day); + break; + case INTERVAL_YEAR: + ltime->year+= sign * (long) interval.year; + if ((ulong) ltime->year >= 10000L) + goto invalid_date; + if (ltime->month == 2 && ltime->day == 29 && + calc_days_in_year(ltime->year) != 366) + ltime->day=28; // Was leap-year + break; + case INTERVAL_YEAR_MONTH: + case INTERVAL_QUARTER: + case INTERVAL_MONTH: + period= (ltime->year*12 + sign * (long) interval.year*12 + + ltime->month-1 + sign * (long) interval.month); + if ((ulong) period >= 120000L) + goto invalid_date; + ltime->year= (uint) (period / 12); + ltime->month= (uint) (period % 12L)+1; + /* Adjust day if the new month doesn't have enough days */ + if (ltime->day > days_in_month[ltime->month-1]) + { + ltime->day = days_in_month[ltime->month-1]; + if (ltime->month == 2 && calc_days_in_year(ltime->year) == 366) + ltime->day++; // Leap-year + } + break; + default: + return 1; + } + return 0; // Ok + +invalid_date: + push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN, + ER_DATETIME_FUNCTION_OVERFLOW, + ER(ER_DATETIME_FUNCTION_OVERFLOW), + "datetime"); + return 1; +} + #endif diff --git a/sql/tztime.cc b/sql/tztime.cc index b86c9a44561..94c1eb8ac63 100644 --- a/sql/tztime.cc +++ b/sql/tztime.cc @@ -807,6 +807,18 @@ sec_since_epoch(int year, int mon, int mday, int hour, int min ,int sec) } + /* + Works like sec_since_epoch but expects TIME structure as parameter. +*/ + +my_time_t +sec_since_epoch_TIME(TIME *t) +{ + return sec_since_epoch(t->year, t->month, t->day, + t->hour, t->minute, t->second); +} + + /* Converts local time in broken down TIME representation to my_time_t representation. @@ -1626,7 +1638,7 @@ my_tz_init(THD *org_thd, const char *default_tzname, my_bool bootstrap) mysql.time_zone* tables are MyISAM and these operations always succeed for MyISAM. */ - (void)table->file->ha_index_init(0); + (void)table->file->ha_index_init(0, 1); tz_leapcnt= 0; res= table->file->index_first(table->record[0]); @@ -1803,7 +1815,7 @@ tz_load_from_open_tables(const String *tz_name, TABLE_LIST *tz_tables) mysql.time_zone* tables are MyISAM and these operations always succeed for MyISAM. */ - (void)table->file->ha_index_init(0); + (void)table->file->ha_index_init(0, 1); if (table->file->index_read(table->record[0], (byte*)table->field[0]->ptr, 0, HA_READ_KEY_EXACT)) @@ -1830,7 +1842,7 @@ tz_load_from_open_tables(const String *tz_name, TABLE_LIST *tz_tables) table= tz_tables->table; tz_tables= tz_tables->next_local; table->field[0]->store((longlong) tzid, TRUE); - (void)table->file->ha_index_init(0); + (void)table->file->ha_index_init(0, 1); if (table->file->index_read(table->record[0], (byte*)table->field[0]->ptr, 0, HA_READ_KEY_EXACT)) @@ -1857,7 +1869,7 @@ tz_load_from_open_tables(const String *tz_name, TABLE_LIST *tz_tables) table= tz_tables->table; tz_tables= tz_tables->next_local; table->field[0]->store((longlong) tzid, TRUE); - (void)table->file->ha_index_init(0); + (void)table->file->ha_index_init(0, 1); // FIXME Is there any better approach than explicitly specifying 4 ??? res= table->file->index_read(table->record[0], (byte*)table->field[0]->ptr, @@ -1929,7 +1941,7 @@ tz_load_from_open_tables(const String *tz_name, TABLE_LIST *tz_tables) */ table= tz_tables->table; table->field[0]->store((longlong) tzid, TRUE); - (void)table->file->ha_index_init(0); + (void)table->file->ha_index_init(0, 1); // FIXME Is there any better approach than explicitly specifying 4 ??? res= table->file->index_read(table->record[0], (byte*)table->field[0]->ptr, diff --git a/sql/tztime.h b/sql/tztime.h index 23460a8e739..42e50988e52 100644 --- a/sql/tztime.h +++ b/sql/tztime.h @@ -65,6 +65,7 @@ extern Time_zone * my_tz_find(const String *name, TABLE_LIST *tz_tables); extern Time_zone * my_tz_find_with_opening_tz_tables(THD *thd, const String *name); extern my_bool my_tz_init(THD *org_thd, const char *default_tzname, my_bool bootstrap); extern void my_tz_free(); +extern my_time_t sec_since_epoch_TIME(TIME *t); extern TABLE_LIST fake_time_zone_tables_list; diff --git a/sql/unireg.cc b/sql/unireg.cc index 0ab77462f61..2c5f4b34091 100644 --- a/sql/unireg.cc +++ b/sql/unireg.cc @@ -35,7 +35,7 @@ static uchar * pack_screens(List<create_field> &create_fields, uint *info_length, uint *screens, bool small_file); static uint pack_keys(uchar *keybuff,uint key_count, KEY *key_info, ulong data_offset); -static bool pack_header(uchar *forminfo,enum db_type table_type, +static bool pack_header(uchar *forminfo,enum legacy_db_type table_type, List<create_field> &create_fields, uint info_length, uint screens, uint table_options, ulong data_offset, handler *file); @@ -43,10 +43,11 @@ static uint get_interval_id(uint *int_count,List<create_field> &create_fields, create_field *last_field); static bool pack_fields(File file, List<create_field> &create_fields, ulong data_offset); -static bool make_empty_rec(THD *thd, int file, enum db_type table_type, +static bool make_empty_rec(THD *thd, int file, enum legacy_db_type table_type, uint table_options, List<create_field> &create_fields, - uint reclength, ulong data_offset); + uint reclength, ulong data_offset, + handler *handler); /* Create a frm (table definition) file @@ -54,7 +55,7 @@ static bool make_empty_rec(THD *thd, int file, enum db_type table_type, SYNOPSIS mysql_create_frm() thd Thread handler - file_name Name of file (including database and .frm) + file_name Path for file (including database and .frm) db Name of database table Name of table create_info create info parameters @@ -62,13 +63,13 @@ static bool make_empty_rec(THD *thd, int file, enum db_type table_type, keys number of keys to create key_info Keys to create db_file Handler to use. May be zero, in which case we use - create_info->db_type + create_info->db_type RETURN 0 ok 1 error */ -bool mysql_create_frm(THD *thd, my_string file_name, +bool mysql_create_frm(THD *thd, const char *file_name, const char *db, const char *table, HA_CREATE_INFO *create_info, List<create_field> &create_fields, @@ -76,28 +77,32 @@ bool mysql_create_frm(THD *thd, my_string file_name, handler *db_file) { LEX_STRING str_db_type; - uint reclength,info_length,screens,key_info_length,maxlength; + uint reclength,info_length,screens,key_info_length,maxlength,i; ulong key_buff_length; File file; ulong filepos, data_offset; uchar fileinfo[64],forminfo[288],*keybuff; TYPELIB formnames; uchar *screen_buff; - char buff[2]; + char buff[32]; +#ifdef WITH_PARTITION_STORAGE_ENGINE + partition_info *part_info= thd->lex->part_info; +#endif DBUG_ENTER("mysql_create_frm"); + DBUG_ASSERT(*fn_rext((char*)file_name)); // Check .frm extension formnames.type_names=0; if (!(screen_buff=pack_screens(create_fields,&info_length,&screens,0))) DBUG_RETURN(1); - if (db_file == NULL) - db_file= get_new_handler((TABLE*) 0, thd->mem_root, create_info->db_type); + DBUG_ASSERT(db_file != NULL); /* If fixed row records, we need one bit to check for deleted rows */ if (!(create_info->table_options & HA_OPTION_PACK_RECORD)) create_info->null_bits++; data_offset= (create_info->null_bits + 7) / 8; - if (pack_header(forminfo, create_info->db_type,create_fields,info_length, + if (pack_header(forminfo, ha_legacy_type(create_info->db_type), + create_fields,info_length, screens, create_info->table_options, data_offset, db_file)) { @@ -109,7 +114,8 @@ bool mysql_create_frm(THD *thd, my_string file_name, thd->net.last_error[0]=0; if (!(screen_buff=pack_screens(create_fields,&info_length,&screens,1))) DBUG_RETURN(1); - if (pack_header(forminfo, create_info->db_type, create_fields,info_length, + if (pack_header(forminfo, ha_legacy_type(create_info->db_type), + create_fields,info_length, screens, create_info->table_options, data_offset, db_file)) { my_free((gptr) screen_buff,MYF(0)); @@ -119,10 +125,26 @@ bool mysql_create_frm(THD *thd, my_string file_name, reclength=uint2korr(forminfo+266); /* Calculate extra data segment length */ - str_db_type.str= (char *) ha_get_storage_engine(create_info->db_type); + str_db_type.str= (char *) ha_resolve_storage_engine_name(create_info->db_type); str_db_type.length= strlen(str_db_type.str); + /* str_db_type */ create_info->extra_size= (2 + str_db_type.length + 2 + create_info->connect_string.length); + /* Partition */ + create_info->extra_size+= 9; +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (part_info) + { + create_info->extra_size+= part_info->part_info_len; + create_info->extra_size+= part_info->part_state_len; + } +#endif + + for (i= 0; i < keys; i++) + { + if (key_info[i].parser_name) + create_info->extra_size+= key_info[i].parser_name->length + 1; + } if ((file=create_frm(thd, file_name, db, table, reclength, fileinfo, create_info, keys)) < 0) @@ -147,6 +169,14 @@ bool mysql_create_frm(THD *thd, my_string file_name, 60); forminfo[46]=(uchar) strlen((char*)forminfo+47); // Length of comment +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (part_info) + { + fileinfo[61]= (uchar) ha_legacy_type(part_info->default_engine_type); + DBUG_PRINT("info", ("part_db_type = %d", fileinfo[61])); + } +#endif + int2store(fileinfo+59,db_file->extra_rec_buf_length()); if (my_pwrite(file,(byte*) fileinfo,64,0L,MYF_RW) || my_pwrite(file,(byte*) keybuff,key_info_length, (ulong) uint2korr(fileinfo+6),MYF_RW)) @@ -154,22 +184,55 @@ bool mysql_create_frm(THD *thd, my_string file_name, VOID(my_seek(file, (ulong) uint2korr(fileinfo+6)+ (ulong) key_buff_length, MY_SEEK_SET,MYF(0))); - if (make_empty_rec(thd,file,create_info->db_type,create_info->table_options, - create_fields,reclength, data_offset)) + if (make_empty_rec(thd,file,ha_legacy_type(create_info->db_type), + create_info->table_options, + create_fields,reclength, data_offset, db_file)) goto err; int2store(buff, create_info->connect_string.length); - if (my_write(file, (const byte*)buff, sizeof(buff), MYF(MY_NABP)) || + if (my_write(file, (const byte*)buff, 2, MYF(MY_NABP)) || my_write(file, (const byte*)create_info->connect_string.str, create_info->connect_string.length, MYF(MY_NABP))) goto err; int2store(buff, str_db_type.length); - if (my_write(file, (const byte*)buff, sizeof(buff), MYF(MY_NABP)) || + if (my_write(file, (const byte*)buff, 2, MYF(MY_NABP)) || my_write(file, (const byte*)str_db_type.str, str_db_type.length, MYF(MY_NABP))) goto err; - + +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (part_info) + { + int4store(buff, part_info->part_info_len); + if (my_write(file, (const byte*)buff, 4, MYF_RW) || + my_write(file, (const byte*)part_info->part_info_string, + part_info->part_info_len + 1, MYF_RW)) + goto err; + DBUG_PRINT("info", ("Part state len = %d", part_info->part_state_len)); + int4store(buff, part_info->part_state_len); + if (my_write(file, (const byte*)buff, 4, MYF_RW) || + my_write(file, (const byte*)part_info->part_state, + part_info->part_state_len, MYF_RW)) + goto err; + } + else +#endif + { + bzero(buff, 9); + if (my_write(file, (byte*) buff, 9, MYF_RW)) + goto err; + } + for (i= 0; i < keys; i++) + { + if (key_info[i].parser_name) + { + if (my_write(file, (const byte*)key_info[i].parser_name->str, + key_info[i].parser_name->length + 1, MYF(MY_NABP))) + goto err; + } + } + VOID(my_seek(file,filepos,MY_SEEK_SET,MYF(0))); if (my_write(file,(byte*) forminfo,288,MYF_RW) || my_write(file,(byte*) screen_buff,info_length,MYF_RW) || @@ -236,37 +299,47 @@ err3: SYNOPSIS rea_create_table() thd Thread handler - file_name Name of file (including database and .frm) - db Name of database - table Name of table + path Name of file (including database, without .frm) + db Data base name + table_name Table name create_info create info parameters create_fields Fields to create keys number of keys to create key_info Keys to create - db_file Handler to use. May be zero, in which case we use - create_info->db_type + file Handler to use + RETURN 0 ok 1 error */ -int rea_create_table(THD *thd, my_string file_name, - const char *db, const char *table, - HA_CREATE_INFO *create_info, - List<create_field> &create_fields, - uint keys, KEY *key_info) +int rea_create_table(THD *thd, const char *path, + const char *db, const char *table_name, + HA_CREATE_INFO *create_info, + List<create_field> &create_fields, + uint keys, KEY *key_info, handler *file) { DBUG_ENTER("rea_create_table"); - if (mysql_create_frm(thd, file_name, db, table, create_info, - create_fields, keys, key_info, NULL)) - DBUG_RETURN(1); - if (!create_info->frm_only && ha_create_table(file_name,create_info,0)) - { - my_delete(file_name,MYF(0)); + char frm_name[FN_REFLEN]; + strxmov(frm_name, path, reg_ext, NullS); + if (mysql_create_frm(thd, frm_name, db, table_name, create_info, + create_fields, keys, key_info, file)) + DBUG_RETURN(1); - } + + // Make sure mysql_create_frm din't remove extension + DBUG_ASSERT(*fn_rext(frm_name)); + if (file->create_handler_files(path)) + goto err_handler; + if (!create_info->frm_only && ha_create_table(thd, path, db, table_name, + create_info,0)) + goto err_handler; DBUG_RETURN(0); + +err_handler: + my_delete(frm_name, MYF(0)); + DBUG_RETURN(1); } /* rea_create_table */ @@ -421,7 +494,7 @@ static uint pack_keys(uchar *keybuff, uint key_count, KEY *keyinfo, /* Make formheader */ -static bool pack_header(uchar *forminfo, enum db_type table_type, +static bool pack_header(uchar *forminfo, enum legacy_db_type table_type, List<create_field> &create_fields, uint info_length, uint screens, uint table_options, ulong data_offset, handler *file) @@ -680,31 +753,30 @@ static bool pack_fields(File file, List<create_field> &create_fields, /* save an empty record on start of formfile */ -static bool make_empty_rec(THD *thd, File file,enum db_type table_type, +static bool make_empty_rec(THD *thd, File file,enum legacy_db_type table_type, uint table_options, List<create_field> &create_fields, uint reclength, - ulong data_offset) + ulong data_offset, + handler *handler) { - int error; + int error= 0; Field::utype type; uint null_count; uchar *buff,*null_pos; TABLE table; + TABLE_SHARE share; create_field *field; - handler *handler; enum_check_fields old_count_cuted_fields= thd->count_cuted_fields; DBUG_ENTER("make_empty_rec"); /* We need a table to generate columns for default values */ - bzero((char*) &table,sizeof(table)); - table.s= &table.share_not_to_be_used; - handler= get_new_handler((TABLE*) 0, thd->mem_root, table_type); + bzero((char*) &table, sizeof(table)); + bzero((char*) &share, sizeof(share)); + table.s= &share; - if (!handler || - !(buff=(uchar*) my_malloc((uint) reclength,MYF(MY_WME | MY_ZEROFILL)))) + if (!(buff=(uchar*) my_malloc((uint) reclength,MYF(MY_WME | MY_ZEROFILL)))) { - delete handler; DBUG_RETURN(1); } @@ -727,21 +799,24 @@ static bool make_empty_rec(THD *thd, File file,enum db_type table_type, /* regfield don't have to be deleted as it's allocated with sql_alloc() */ - Field *regfield=make_field((char*) buff+field->offset + data_offset, - field->length, - null_pos + null_count / 8, - null_count & 7, - field->pack_flag, - field->sql_type, - field->charset, - field->geom_type, - field->unireg_check, - field->interval, - field->field_name, - &table); + Field *regfield= make_field(&share, + (char*) buff+field->offset + data_offset, + field->length, + null_pos + null_count / 8, + null_count & 7, + field->pack_flag, + field->sql_type, + field->charset, + field->geom_type, + field->unireg_check, + field->interval, + field->field_name); if (!regfield) goto err; // End of memory + /* save_in_field() will access regfield->table->in_use */ + regfield->init(&table); + if (!(field->flags & NOT_NULL_FLAG)) { *regfield->null_ptr|= regfield->null_bit; @@ -761,6 +836,7 @@ static bool make_empty_rec(THD *thd, File file,enum db_type table_type, { my_error(ER_INVALID_DEFAULT, MYF(0), regfield->field_name); error= 1; + delete regfield; //To avoid memory leak goto err; } } @@ -790,7 +866,6 @@ static bool make_empty_rec(THD *thd, File file,enum db_type table_type, err: my_free((gptr) buff,MYF(MY_FAE)); - delete handler; thd->count_cuted_fields= old_count_cuted_fields; DBUG_RETURN(error); } /* make_empty_rec */ diff --git a/sql/unireg.h b/sql/unireg.h index b932a2f320c..9ab8753af84 100644 --- a/sql/unireg.h +++ b/sql/unireg.h @@ -36,6 +36,9 @@ #ifndef SHAREDIR #define SHAREDIR "share/" #endif +#ifndef LIBDIR +#define LIBDIR "lib/" +#endif #define ER(X) errmesg[(X) - ER_ERROR_FIRST] #define ER_SAFE(X) (((X) >= ER_ERROR_FIRST && (X) <= ER_ERROR_LAST) ? ER(X) : "Invalid error code") @@ -80,6 +83,7 @@ #define PSEUDO_TABLE_BITS (PARAM_TABLE_BIT | OUTER_REF_TABLE_BIT | \ RAND_TABLE_BIT) #define MAX_FIELDS 4096 /* Limit in the .frm file */ +#define MAX_PARTITIONS 1024 #define MAX_SORT_MEMORY (2048*1024-MALLOC_OVERHEAD) #define MIN_SORT_MEMORY (32*1024-MALLOC_OVERHEAD) @@ -146,13 +150,13 @@ #define DONT_GIVE_ERROR 256 /* Don't do frm_error on openfrm */ #define READ_SCREENS 1024 /* Read screens, info and helpfile */ #define DELAYED_OPEN 4096 /* Open table later */ -#define NO_ERR_ON_NEW_FRM 8192 /* stop error sending on new format */ +#define OPEN_VIEW 8192 /* Allow open on view */ #define SC_INFO_LENGTH 4 /* Form format constant */ #define TE_INFO_LENGTH 3 #define MTYP_NOEMPTY_BIT 128 -#define FRM_VER_TRUE_VARCHAR (FRM_VER+4) +#define FRM_VER_TRUE_VARCHAR (FRM_VER+4) /* 10 */ /* Minimum length pattern before Turbo Boyer-Moore is used for SELECT "text" LIKE "%pattern%", excluding the two |